159 files changed, 68962 insertions, 0 deletions
diff --git a/libs/ode-0.16.1/ode/src/Makefile.am b/libs/ode-0.16.1/ode/src/Makefile.am
new file mode 100644
index 0000000..609044b
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/Makefile.am
@@ -0,0 +1,201 @@
+SUBDIRS = joints
+
+AM_CPPFLAGS = -I$(top_srcdir)/include \
+        -I$(top_builddir)/include \
+        -D__ODE__
+
+
+
+lib_LTLIBRARIES = libode.la
+
+libode_la_LDFLAGS = @EXTRA_LIBTOOL_LDFLAGS@ @ODE_VERSION_INFO@
+libode_la_LIBADD = joints/libjoints.la
+
+
+# please, let's keep the filenames sorted
+libode_la_SOURCES =     nextafterf.c \
+                        array.cpp array.h \
+                        box.cpp \
+                        capsule.cpp \
+                        collision_cylinder_box.cpp \
+                        collision_cylinder_plane.cpp \
+                        collision_cylinder_sphere.cpp \
+                        collision_kernel.cpp collision_kernel.h \
+                        collision_quadtreespace.cpp \
+                        collision_sapspace.cpp \
+                        collision_space.cpp \
+                        collision_space_internal.h \
+                        collision_std.h \
+                        collision_transform.cpp collision_transform.h \
+                        collision_trimesh_colliders.h \
+                        collision_trimesh_disabled.cpp \
+                        collision_trimesh_internal.h \
+                        collision_trimesh_opcode.h \
+                        collision_trimesh_gimpact.h \
+                        collision_util.cpp collision_util.h \
+                        common.h \
+                        convex.cpp \
+                        coop_matrix_types.h \
+                        cylinder.cpp \
+                        default_threading.cpp default_threading.h \
+                        error.cpp error.h \
+                        export-dif.cpp \
+                        fastdot.cpp fastdot_impl.h \
+                        fastldltfactor.cpp fastldltfactor_impl.h \
+                        fastldltsolve.cpp fastldltsolve_impl.h \
+                        fastlsolve.cpp fastlsolve_impl.h \
+                        fastltsolve.cpp fastltsolve_impl.h \
+                        fastvecscale.cpp fastvecscale_impl.h \
+                        heightfield.cpp heightfield.h \
+                        lcp.cpp lcp.h \
+                        mass.cpp \
+                        mat.cpp mat.h \
+                        matrix.cpp matrix.h \
+                        memory.cpp \
+                        misc.cpp \
+                        objects.cpp objects.h \
+                        obstack.cpp obstack.h \
+                        ode.cpp \
+                        odeinit.cpp \
+                        odemath.cpp odemath.h \
+                        odeou.h \
+                        odetls.h \
+                        plane.cpp \
+                        quickstep.cpp quickstep.h \
+                        ray.cpp \
+                        resource_control.cpp resource_control.h \
+                        rotation.cpp \
+                        simple_cooperative.cpp simple_cooperative.h \
+                        sphere.cpp \
+                        step.cpp step.h \
+                        timer.cpp \
+                        threaded_solver_ldlt.h \
+                        threading_atomics_provs.h \
+                        threading_base.cpp threading_base.h \
+                        threading_fake_sync.h \
+                        threading_impl.cpp threading_impl.h \
+                        threading_impl_posix.h \
+                        threading_impl_templates.h \
+                        threading_impl_win.h \
+                        threading_pool_posix.cpp \
+                        threading_pool_win.cpp \
+                        threadingutils.h \
+                        typedefs.h \
+                        util.cpp util.h
+
+
+###################################
+#       O U    S T U F F
+###################################
+
+
+if ENABLE_OU
+
+AM_CPPFLAGS += -I$(top_srcdir)/ou/include
+libode_la_LIBADD += $(top_builddir)/ou/src/ou/libou.la
+libode_la_SOURCES +=    odetls.cpp odetls.h \
+                        odeou.cpp odeou.h
+
+endif
+
+
+###################################
+#   G I M P A C T    S T U F F
+###################################
+
+
+if GIMPACT
+AM_CPPFLAGS += -DdTRIMESH_ENABLED -DdTRIMESH_GIMPACT -I$(top_srcdir)/GIMPACT/include
+
+libode_la_LIBADD += $(top_builddir)/GIMPACT/src/libGIMPACT.la
+libode_la_SOURCES +=    collision_trimesh_gimpact.cpp \
+                        collision_trimesh_internal.cpp collision_trimesh_internal_impl.h \
+                        gimpact_contact_export_helper.cpp gimpact_contact_export_helper.h \
+                        gimpact_gim_contact_accessor.h \
+                        gimpact_plane_contact_accessor.h \
+                        collision_trimesh_trimesh.cpp \
+                        collision_trimesh_sphere.cpp \
+                        collision_trimesh_ray.cpp \
+                        collision_trimesh_box.cpp \
+                        collision_trimesh_ccylinder.cpp \
+                        collision_trimesh_internal.h \
+                        collision_cylinder_trimesh.cpp \
+                        collision_trimesh_plane.cpp \
+                        collision_convex_trimesh.cpp
+endif
+
+
+
+#################################
+#   O P C O D E    S T U F F
+#################################
+
+
+if OPCODE
+AM_CPPFLAGS += -I$(top_srcdir)/OPCODE -I$(top_srcdir)/OPCODE/Ice -DdTRIMESH_ENABLED -DdTRIMESH_OPCODE
+libode_la_LIBADD += $(top_builddir)/OPCODE/libOPCODE.la \
+                    $(top_builddir)/OPCODE/Ice/libIce.la
+
+libode_la_SOURCES+=     collision_trimesh_opcode.cpp \
+                        collision_trimesh_internal.cpp collision_trimesh_internal_impl.h \
+                        collision_trimesh_trimesh.cpp \
+                        collision_trimesh_trimesh_old.cpp \
+                        collision_trimesh_sphere.cpp \
+                        collision_trimesh_ray.cpp \
+                        collision_trimesh_box.cpp \
+                        collision_trimesh_ccylinder.cpp \
+                        collision_trimesh_internal.h \
+                        collision_cylinder_trimesh.cpp \
+                        collision_trimesh_plane.cpp \
+                        collision_convex_trimesh.cpp
+endif
+
+
+if LIBCCD
+
+AM_CPPFLAGS += -DdLIBCCD_ENABLED
+AM_CPPFLAGS += -I$(top_srcdir)/libccd/src/custom
+
+if LIBCCD_INTERNAL
+AM_CPPFLAGS += -I$(top_srcdir)/libccd/src -I$(top_builddir)/libccd/src 
+libode_la_LIBADD += $(top_builddir)/libccd/src/libccd.la
+AM_CPPFLAGS += -DdLIBCCD_INTERNAL
+else
+AM_CPPFLAGS += $(CCD_CFLAGS)
+libode_la_LIBADD += $(CCD_LIBS)
+AM_CPPFLAGS += -DdLIBCCD_SYSTEM
+endif
+
+
+libode_la_SOURCES += collision_libccd.cpp collision_libccd.h
+
+if LIBCCD_BOX_CYL
+AM_CPPFLAGS += -DdLIBCCD_BOX_CYL
+endif
+
+if LIBCCD_CYL_CYL
+AM_CPPFLAGS += -DdLIBCCD_CYL_CYL
+endif
+
+if LIBCCD_CAP_CYL
+AM_CPPFLAGS += -DdLIBCCD_CAP_CYL
+endif
+
+if LIBCCD_CONVEX_BOX
+AM_CPPFLAGS += -DdLIBCCD_CONVEX_BOX
+endif
+if LIBCCD_CONVEX_CAP
+AM_CPPFLAGS += -DdLIBCCD_CONVEX_CAP
+endif
+if LIBCCD_CONVEX_CYL
+AM_CPPFLAGS += -DdLIBCCD_CONVEX_CYL
+endif
+if LIBCCD_CONVEX_SPHERE
+AM_CPPFLAGS += -DdLIBCCD_CONVEX_SPHERE
+endif
+if LIBCCD_CONVEX_CONVEX
+AM_CPPFLAGS += -DdLIBCCD_CONVEX_CONVEX
+endif
+
+
+endif
diff --git a/libs/ode-0.16.1/ode/src/Makefile.in b/libs/ode-0.16.1/ode/src/Makefile.in
new file mode 100644
index 0000000..330d599
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/Makefile.in
@@ -0,0 +1,1100 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+
+###################################
+#       O U    S T U F F
+###################################
+@ENABLE_OU_TRUE@am__append_1 = -I$(top_srcdir)/ou/include
+@ENABLE_OU_TRUE@am__append_2 = $(top_builddir)/ou/src/ou/libou.la
+@ENABLE_OU_TRUE@am__append_3 = odetls.cpp odetls.h \
+@ENABLE_OU_TRUE@                        odeou.cpp odeou.h
+
+
+###################################
+#   G I M P A C T    S T U F F
+###################################
+@GIMPACT_TRUE@am__append_4 = -DdTRIMESH_ENABLED -DdTRIMESH_GIMPACT -I$(top_srcdir)/GIMPACT/include
+@GIMPACT_TRUE@am__append_5 = $(top_builddir)/GIMPACT/src/libGIMPACT.la
+@GIMPACT_TRUE@am__append_6 = collision_trimesh_gimpact.cpp \
+@GIMPACT_TRUE@                        collision_trimesh_internal.cpp collision_trimesh_internal_impl.h \
+@GIMPACT_TRUE@                        gimpact_contact_export_helper.cpp gimpact_contact_export_helper.h \
+@GIMPACT_TRUE@                        gimpact_gim_contact_accessor.h \
+@GIMPACT_TRUE@                        gimpact_plane_contact_accessor.h \
+@GIMPACT_TRUE@                        collision_trimesh_trimesh.cpp \
+@GIMPACT_TRUE@                        collision_trimesh_sphere.cpp \
+@GIMPACT_TRUE@                        collision_trimesh_ray.cpp \
+@GIMPACT_TRUE@                        collision_trimesh_box.cpp \
+@GIMPACT_TRUE@                        collision_trimesh_ccylinder.cpp \
+@GIMPACT_TRUE@                        collision_trimesh_internal.h \
+@GIMPACT_TRUE@                        collision_cylinder_trimesh.cpp \
+@GIMPACT_TRUE@                        collision_trimesh_plane.cpp \
+@GIMPACT_TRUE@                        collision_convex_trimesh.cpp
+
+
+#################################
+#   O P C O D E    S T U F F
+#################################
+@OPCODE_TRUE@am__append_7 = -I$(top_srcdir)/OPCODE -I$(top_srcdir)/OPCODE/Ice -DdTRIMESH_ENABLED -DdTRIMESH_OPCODE
+@OPCODE_TRUE@am__append_8 = $(top_builddir)/OPCODE/libOPCODE.la \
+@OPCODE_TRUE@                    $(top_builddir)/OPCODE/Ice/libIce.la
+
+@OPCODE_TRUE@am__append_9 = collision_trimesh_opcode.cpp \
+@OPCODE_TRUE@                        collision_trimesh_internal.cpp collision_trimesh_internal_impl.h \
+@OPCODE_TRUE@                        collision_trimesh_trimesh.cpp \
+@OPCODE_TRUE@                        collision_trimesh_trimesh_old.cpp \
+@OPCODE_TRUE@                        collision_trimesh_sphere.cpp \
+@OPCODE_TRUE@                        collision_trimesh_ray.cpp \
+@OPCODE_TRUE@                        collision_trimesh_box.cpp \
+@OPCODE_TRUE@                        collision_trimesh_ccylinder.cpp \
+@OPCODE_TRUE@                        collision_trimesh_internal.h \
+@OPCODE_TRUE@                        collision_cylinder_trimesh.cpp \
+@OPCODE_TRUE@                        collision_trimesh_plane.cpp \
+@OPCODE_TRUE@                        collision_convex_trimesh.cpp
+
+@LIBCCD_TRUE@am__append_10 = -DdLIBCCD_ENABLED \
+@LIBCCD_TRUE@	-I$(top_srcdir)/libccd/src/custom
+@LIBCCD_INTERNAL_TRUE@@LIBCCD_TRUE@am__append_11 =  \
+@LIBCCD_INTERNAL_TRUE@@LIBCCD_TRUE@	-I$(top_srcdir)/libccd/src \
+@LIBCCD_INTERNAL_TRUE@@LIBCCD_TRUE@	-I$(top_builddir)/libccd/src \
+@LIBCCD_INTERNAL_TRUE@@LIBCCD_TRUE@	-DdLIBCCD_INTERNAL
+@LIBCCD_INTERNAL_TRUE@@LIBCCD_TRUE@am__append_12 = $(top_builddir)/libccd/src/libccd.la
+@LIBCCD_INTERNAL_FALSE@@LIBCCD_TRUE@am__append_13 = $(CCD_CFLAGS) \
+@LIBCCD_INTERNAL_FALSE@@LIBCCD_TRUE@	-DdLIBCCD_SYSTEM
+@LIBCCD_INTERNAL_FALSE@@LIBCCD_TRUE@am__append_14 = $(CCD_LIBS)
+@LIBCCD_TRUE@am__append_15 = collision_libccd.cpp collision_libccd.h
+@LIBCCD_BOX_CYL_TRUE@@LIBCCD_TRUE@am__append_16 = -DdLIBCCD_BOX_CYL
+@LIBCCD_CYL_CYL_TRUE@@LIBCCD_TRUE@am__append_17 = -DdLIBCCD_CYL_CYL
+@LIBCCD_CAP_CYL_TRUE@@LIBCCD_TRUE@am__append_18 = -DdLIBCCD_CAP_CYL
+@LIBCCD_CONVEX_BOX_TRUE@@LIBCCD_TRUE@am__append_19 = -DdLIBCCD_CONVEX_BOX
+@LIBCCD_CONVEX_CAP_TRUE@@LIBCCD_TRUE@am__append_20 = -DdLIBCCD_CONVEX_CAP
+@LIBCCD_CONVEX_CYL_TRUE@@LIBCCD_TRUE@am__append_21 = -DdLIBCCD_CONVEX_CYL
+@LIBCCD_CONVEX_SPHERE_TRUE@@LIBCCD_TRUE@am__append_22 = -DdLIBCCD_CONVEX_SPHERE
+@LIBCCD_CONVEX_CONVEX_TRUE@@LIBCCD_TRUE@am__append_23 = -DdLIBCCD_CONVEX_CONVEX
+subdir = ode/src
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
+	$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+	$(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(libdir)"
+LTLIBRARIES = $(lib_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+@LIBCCD_INTERNAL_FALSE@@LIBCCD_TRUE@am__DEPENDENCIES_2 =  \
+@LIBCCD_INTERNAL_FALSE@@LIBCCD_TRUE@	$(am__DEPENDENCIES_1)
+libode_la_DEPENDENCIES = joints/libjoints.la $(am__append_2) \
+	$(am__append_5) $(am__append_8) $(am__append_12) \
+	$(am__DEPENDENCIES_2)
+am__libode_la_SOURCES_DIST = nextafterf.c array.cpp array.h box.cpp \
+	capsule.cpp collision_cylinder_box.cpp \
+	collision_cylinder_plane.cpp collision_cylinder_sphere.cpp \
+	collision_kernel.cpp collision_kernel.h \
+	collision_quadtreespace.cpp collision_sapspace.cpp \
+	collision_space.cpp collision_space_internal.h collision_std.h \
+	collision_transform.cpp collision_transform.h \
+	collision_trimesh_colliders.h collision_trimesh_disabled.cpp \
+	collision_trimesh_internal.h collision_trimesh_opcode.h \
+	collision_trimesh_gimpact.h collision_util.cpp \
+	collision_util.h common.h convex.cpp coop_matrix_types.h \
+	cylinder.cpp default_threading.cpp default_threading.h \
+	error.cpp error.h export-dif.cpp fastdot.cpp fastdot_impl.h \
+	fastldltfactor.cpp fastldltfactor_impl.h fastldltsolve.cpp \
+	fastldltsolve_impl.h fastlsolve.cpp fastlsolve_impl.h \
+	fastltsolve.cpp fastltsolve_impl.h fastvecscale.cpp \
+	fastvecscale_impl.h heightfield.cpp heightfield.h lcp.cpp \
+	lcp.h mass.cpp mat.cpp mat.h matrix.cpp matrix.h memory.cpp \
+	misc.cpp objects.cpp objects.h obstack.cpp obstack.h ode.cpp \
+	odeinit.cpp odemath.cpp odemath.h odeou.h odetls.h plane.cpp \
+	quickstep.cpp quickstep.h ray.cpp resource_control.cpp \
+	resource_control.h rotation.cpp simple_cooperative.cpp \
+	simple_cooperative.h sphere.cpp step.cpp step.h timer.cpp \
+	threaded_solver_ldlt.h threading_atomics_provs.h \
+	threading_base.cpp threading_base.h threading_fake_sync.h \
+	threading_impl.cpp threading_impl.h threading_impl_posix.h \
+	threading_impl_templates.h threading_impl_win.h \
+	threading_pool_posix.cpp threading_pool_win.cpp \
+	threadingutils.h typedefs.h util.cpp util.h odetls.cpp \
+	odeou.cpp collision_trimesh_gimpact.cpp \
+	collision_trimesh_internal.cpp \
+	collision_trimesh_internal_impl.h \
+	gimpact_contact_export_helper.cpp \
+	gimpact_contact_export_helper.h gimpact_gim_contact_accessor.h \
+	gimpact_plane_contact_accessor.h collision_trimesh_trimesh.cpp \
+	collision_trimesh_sphere.cpp collision_trimesh_ray.cpp \
+	collision_trimesh_box.cpp collision_trimesh_ccylinder.cpp \
+	collision_cylinder_trimesh.cpp collision_trimesh_plane.cpp \
+	collision_convex_trimesh.cpp collision_trimesh_opcode.cpp \
+	collision_trimesh_trimesh_old.cpp collision_libccd.cpp \
+	collision_libccd.h
+@ENABLE_OU_TRUE@am__objects_1 = odetls.lo odeou.lo
+@GIMPACT_TRUE@am__objects_2 = collision_trimesh_gimpact.lo \
+@GIMPACT_TRUE@	collision_trimesh_internal.lo \
+@GIMPACT_TRUE@	gimpact_contact_export_helper.lo \
+@GIMPACT_TRUE@	collision_trimesh_trimesh.lo \
+@GIMPACT_TRUE@	collision_trimesh_sphere.lo \
+@GIMPACT_TRUE@	collision_trimesh_ray.lo \
+@GIMPACT_TRUE@	collision_trimesh_box.lo \
+@GIMPACT_TRUE@	collision_trimesh_ccylinder.lo \
+@GIMPACT_TRUE@	collision_cylinder_trimesh.lo \
+@GIMPACT_TRUE@	collision_trimesh_plane.lo \
+@GIMPACT_TRUE@	collision_convex_trimesh.lo
+@OPCODE_TRUE@am__objects_3 = collision_trimesh_opcode.lo \
+@OPCODE_TRUE@	collision_trimesh_internal.lo \
+@OPCODE_TRUE@	collision_trimesh_trimesh.lo \
+@OPCODE_TRUE@	collision_trimesh_trimesh_old.lo \
+@OPCODE_TRUE@	collision_trimesh_sphere.lo \
+@OPCODE_TRUE@	collision_trimesh_ray.lo collision_trimesh_box.lo \
+@OPCODE_TRUE@	collision_trimesh_ccylinder.lo \
+@OPCODE_TRUE@	collision_cylinder_trimesh.lo \
+@OPCODE_TRUE@	collision_trimesh_plane.lo \
+@OPCODE_TRUE@	collision_convex_trimesh.lo
+@LIBCCD_TRUE@am__objects_4 = collision_libccd.lo
+am_libode_la_OBJECTS = nextafterf.lo array.lo box.lo capsule.lo \
+	collision_cylinder_box.lo collision_cylinder_plane.lo \
+	collision_cylinder_sphere.lo collision_kernel.lo \
+	collision_quadtreespace.lo collision_sapspace.lo \
+	collision_space.lo collision_transform.lo \
+	collision_trimesh_disabled.lo collision_util.lo convex.lo \
+	cylinder.lo default_threading.lo error.lo export-dif.lo \
+	fastdot.lo fastldltfactor.lo fastldltsolve.lo fastlsolve.lo \
+	fastltsolve.lo fastvecscale.lo heightfield.lo lcp.lo mass.lo \
+	mat.lo matrix.lo memory.lo misc.lo objects.lo obstack.lo \
+	ode.lo odeinit.lo odemath.lo plane.lo quickstep.lo ray.lo \
+	resource_control.lo rotation.lo simple_cooperative.lo \
+	sphere.lo step.lo timer.lo threading_base.lo threading_impl.lo \
+	threading_pool_posix.lo threading_pool_win.lo util.lo \
+	$(am__objects_1) $(am__objects_2) $(am__objects_3) \
+	$(am__objects_4)
+libode_la_OBJECTS = $(am_libode_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+libode_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+	$(CXXFLAGS) $(libode_la_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CXXFLAGS) $(CXXFLAGS)
+AM_V_CXX = $(am__v_CXX_@AM_V@)
+am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@)
+am__v_CXX_0 = @echo "  CXX     " $@;
+am__v_CXX_1 = 
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CXXLD = $(am__v_CXXLD_@AM_V@)
+am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@)
+am__v_CXXLD_0 = @echo "  CXXLD   " $@;
+am__v_CXXLD_1 = 
+SOURCES = $(libode_la_SOURCES)
+DIST_SOURCES = $(am__libode_la_SOURCES_DIST)
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+	ctags-recursive dvi-recursive html-recursive info-recursive \
+	install-data-recursive install-dvi-recursive \
+	install-exec-recursive install-html-recursive \
+	install-info-recursive install-pdf-recursive \
+	install-ps-recursive install-recursive installcheck-recursive \
+	installdirs-recursive pdf-recursive ps-recursive \
+	tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive	\
+  distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+  $(RECURSIVE_TARGETS) \
+  $(RECURSIVE_CLEAN_TARGETS) \
+  $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+	distdir
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \
+	$(LISP)config.h.in
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DIST_SUBDIRS = $(SUBDIRS)
+am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/config.h.in \
+	$(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+  dir0=`pwd`; \
+  sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+  sed_rest='s,^[^/]*/*,,'; \
+  sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+  sed_butlast='s,/*[^/]*$$,,'; \
+  while test -n "$$dir1"; do \
+    first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+    if test "$$first" != "."; then \
+      if test "$$first" = ".."; then \
+        dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+        dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+      else \
+        first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+        if test "$$first2" = "$$first"; then \
+          dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+        else \
+          dir2="../$$dir2"; \
+        fi; \
+        dir0="$$dir0"/"$$first"; \
+      fi; \
+    fi; \
+    dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+  done; \
+  reldir="$$dir2"
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CCD_CFLAGS = @CCD_CFLAGS@
+CCD_LIBS = @CCD_LIBS@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DOXYGEN = @DOXYGEN@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXTRA_LIBTOOL_LDFLAGS = @EXTRA_LIBTOOL_LDFLAGS@
+FGREP = @FGREP@
+GL_LIBS = @GL_LIBS@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBSTDCXX = @LIBSTDCXX@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+ODE_PRECISION = @ODE_PRECISION@
+ODE_VERSION = @ODE_VERSION@
+ODE_VERSION_INFO = @ODE_VERSION_INFO@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+WINDRES = @WINDRES@
+X11_CFLAGS = @X11_CFLAGS@
+X11_LIBS = @X11_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+ac_ct_WINDRES = @ac_ct_WINDRES@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+subdirs = @subdirs@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+SUBDIRS = joints
+AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include \
+	-D__ODE__ $(am__append_1) $(am__append_4) $(am__append_7) \
+	$(am__append_10) $(am__append_11) $(am__append_13) \
+	$(am__append_16) $(am__append_17) $(am__append_18) \
+	$(am__append_19) $(am__append_20) $(am__append_21) \
+	$(am__append_22) $(am__append_23)
+lib_LTLIBRARIES = libode.la
+libode_la_LDFLAGS = @EXTRA_LIBTOOL_LDFLAGS@ @ODE_VERSION_INFO@
+libode_la_LIBADD = joints/libjoints.la $(am__append_2) $(am__append_5) \
+	$(am__append_8) $(am__append_12) $(am__append_14)
+
+# please, let's keep the filenames sorted
+libode_la_SOURCES = nextafterf.c array.cpp array.h box.cpp capsule.cpp \
+	collision_cylinder_box.cpp collision_cylinder_plane.cpp \
+	collision_cylinder_sphere.cpp collision_kernel.cpp \
+	collision_kernel.h collision_quadtreespace.cpp \
+	collision_sapspace.cpp collision_space.cpp \
+	collision_space_internal.h collision_std.h \
+	collision_transform.cpp collision_transform.h \
+	collision_trimesh_colliders.h collision_trimesh_disabled.cpp \
+	collision_trimesh_internal.h collision_trimesh_opcode.h \
+	collision_trimesh_gimpact.h collision_util.cpp \
+	collision_util.h common.h convex.cpp coop_matrix_types.h \
+	cylinder.cpp default_threading.cpp default_threading.h \
+	error.cpp error.h export-dif.cpp fastdot.cpp fastdot_impl.h \
+	fastldltfactor.cpp fastldltfactor_impl.h fastldltsolve.cpp \
+	fastldltsolve_impl.h fastlsolve.cpp fastlsolve_impl.h \
+	fastltsolve.cpp fastltsolve_impl.h fastvecscale.cpp \
+	fastvecscale_impl.h heightfield.cpp heightfield.h lcp.cpp \
+	lcp.h mass.cpp mat.cpp mat.h matrix.cpp matrix.h memory.cpp \
+	misc.cpp objects.cpp objects.h obstack.cpp obstack.h ode.cpp \
+	odeinit.cpp odemath.cpp odemath.h odeou.h odetls.h plane.cpp \
+	quickstep.cpp quickstep.h ray.cpp resource_control.cpp \
+	resource_control.h rotation.cpp simple_cooperative.cpp \
+	simple_cooperative.h sphere.cpp step.cpp step.h timer.cpp \
+	threaded_solver_ldlt.h threading_atomics_provs.h \
+	threading_base.cpp threading_base.h threading_fake_sync.h \
+	threading_impl.cpp threading_impl.h threading_impl_posix.h \
+	threading_impl_templates.h threading_impl_win.h \
+	threading_pool_posix.cpp threading_pool_win.cpp \
+	threadingutils.h typedefs.h util.cpp util.h $(am__append_3) \
+	$(am__append_6) $(am__append_9) $(am__append_15)
+all: config.h
+	$(MAKE) $(AM_MAKEFLAGS) all-recursive
+
+.SUFFIXES:
+.SUFFIXES: .c .cpp .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign ode/src/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign ode/src/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+config.h: stamp-h1
+	@test -f $@ || rm -f stamp-h1
+	@test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1
+
+stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status
+	@rm -f stamp-h1
+	cd $(top_builddir) && $(SHELL) ./config.status ode/src/config.h
+$(srcdir)/config.h.in:  $(am__configure_deps) 
+	($(am__cd) $(top_srcdir) && $(AUTOHEADER))
+	rm -f stamp-h1
+	touch $@
+
+distclean-hdr:
+	-rm -f config.h stamp-h1
+
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+	@$(NORMAL_INSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+	}
+
+uninstall-libLTLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	for p in $$list; do \
+	  $(am__strip_dir) \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
+	done
+
+clean-libLTLIBRARIES:
+	-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+	@list='$(lib_LTLIBRARIES)'; \
+	locs=`for p in $$list; do echo $$p; done | \
+	      sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+	      sort -u`; \
+	test -z "$$locs" || { \
+	  echo rm -f $${locs}; \
+	  rm -f $${locs}; \
+	}
+
+libode.la: $(libode_la_OBJECTS) $(libode_la_DEPENDENCIES) $(EXTRA_libode_la_DEPENDENCIES) 
+	$(AM_V_CXXLD)$(libode_la_LINK) -rpath $(libdir) $(libode_la_OBJECTS) $(libode_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/array.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/box.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/capsule.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_convex_trimesh.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_cylinder_box.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_cylinder_plane.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_cylinder_sphere.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_cylinder_trimesh.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_kernel.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_libccd.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_quadtreespace.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_sapspace.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_space.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_transform.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_box.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_ccylinder.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_disabled.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_gimpact.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_internal.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_opcode.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_plane.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_ray.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_sphere.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_trimesh.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_trimesh_trimesh_old.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/collision_util.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/convex.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cylinder.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/default_threading.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/error.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/export-dif.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fastdot.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fastldltfactor.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fastldltsolve.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fastlsolve.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fastltsolve.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fastvecscale.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gimpact_contact_export_helper.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/heightfield.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lcp.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mass.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mat.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matrix.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memory.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/misc.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nextafterf.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/objects.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/obstack.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ode.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/odeinit.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/odemath.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/odeou.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/odetls.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plane.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/quickstep.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ray.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/resource_control.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rotation.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/simple_cooperative.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sphere.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/step.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/threading_base.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/threading_impl.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/threading_pool_posix.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/threading_pool_win.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/timer.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@	$(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+.cpp.o:
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $<
+
+.cpp.obj:
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cpp.lo:
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+#     (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+	@fail=; \
+	if $(am__make_keepgoing); then \
+	  failcom='fail=yes'; \
+	else \
+	  failcom='exit 1'; \
+	fi; \
+	dot_seen=no; \
+	target=`echo $@ | sed s/-recursive//`; \
+	case "$@" in \
+	  distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+	  *) list='$(SUBDIRS)' ;; \
+	esac; \
+	for subdir in $$list; do \
+	  echo "Making $$target in $$subdir"; \
+	  if test "$$subdir" = "."; then \
+	    dot_seen=yes; \
+	    local_target="$$target-am"; \
+	  else \
+	    local_target="$$target"; \
+	  fi; \
+	  ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+	  || eval $$failcom; \
+	done; \
+	if test "$$dot_seen" = "no"; then \
+	  $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+	fi; test -z "$$fail"
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+	  include_option=--etags-include; \
+	  empty_fix=.; \
+	else \
+	  include_option=--include; \
+	  empty_fix=; \
+	fi; \
+	list='$(SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    test ! -f $$subdir/TAGS || \
+	      set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+	  fi; \
+	done; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-recursive
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-recursive
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+	@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+	  if test "$$subdir" = .; then :; else \
+	    $(am__make_dryrun) \
+	      || test -d "$(distdir)/$$subdir" \
+	      || $(MKDIR_P) "$(distdir)/$$subdir" \
+	      || exit 1; \
+	    dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+	    $(am__relativize); \
+	    new_distdir=$$reldir; \
+	    dir1=$$subdir; dir2="$(top_distdir)"; \
+	    $(am__relativize); \
+	    new_top_distdir=$$reldir; \
+	    echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+	    echo "     am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+	    ($(am__cd) $$subdir && \
+	      $(MAKE) $(AM_MAKEFLAGS) \
+	        top_distdir="$$new_top_distdir" \
+	        distdir="$$new_distdir" \
+		am__remove_distdir=: \
+		am__skip_length_check=: \
+		am__skip_mode_fix=: \
+	        distdir) \
+	      || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-recursive
+all-am: Makefile $(LTLIBRARIES) config.h
+installdirs: installdirs-recursive
+installdirs-am:
+	for dir in "$(DESTDIR)$(libdir)"; do \
+	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+	done
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-recursive
+
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \
+	mostlyclean-am
+
+distclean: distclean-recursive
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-hdr distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am: install-libLTLIBRARIES
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am: uninstall-libLTLIBRARIES
+
+.MAKE: $(am__recursive_targets) all install-am install-strip
+
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
+	check-am clean clean-generic clean-libLTLIBRARIES \
+	clean-libtool cscopelist-am ctags ctags-am distclean \
+	distclean-compile distclean-generic distclean-hdr \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-libLTLIBRARIES install-man install-pdf \
+	install-pdf-am install-ps install-ps-am install-strip \
+	installcheck installcheck-am installdirs installdirs-am \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+	pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
+	uninstall-libLTLIBRARIES
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/libs/ode-0.16.1/ode/src/array.cpp b/libs/ode-0.16.1/ode/src/array.cpp
new file mode 100644
index 0000000..4d63925
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/array.cpp
@@ -0,0 +1,81 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/odeconfig.h>
+#include <ode/memory.h>
+#include <ode/error.h>
+#include "config.h"
+#include "array.h"
+
+
+static inline int roundUpToPowerOfTwo (int x)
+{
+    int i = 1;
+    while (i < x) i <<= 1;
+    return i;
+}
+
+
+void dArrayBase::_freeAll (int sizeofT)
+{
+    if (_data) {
+        if (_data == this+1) return;	// if constructLocalArray() was called
+        dFree (_data,_anum * sizeofT);
+    }
+}
+
+
+void dArrayBase::_setSize (int newsize, int sizeofT)
+{
+    if (newsize < 0) return;
+    if (newsize > _anum) {
+        if (_data == this+1) {
+            // this is a no-no, because constructLocalArray() was called
+            dDebug (0,"setSize() out of space in LOCAL array");
+        }
+        int newanum = roundUpToPowerOfTwo (newsize);
+        if (_data) _data = dRealloc (_data, _anum*sizeofT, newanum*sizeofT);
+        else _data = dAlloc (newanum*sizeofT);
+        _anum = newanum;
+    }
+    _size = newsize;
+}
+
+
+void * dArrayBase::operator new (size_t size)
+{
+    return dAlloc (size);
+}
+
+
+void dArrayBase::operator delete (void *ptr, size_t size)
+{
+    dFree (ptr,size);
+}
+
+
+void dArrayBase::constructLocalArray (int __anum)
+{
+    _size = 0;
+    _anum = __anum;
+    _data = this+1;
+}
diff --git a/libs/ode-0.16.1/ode/src/array.h b/libs/ode-0.16.1/ode/src/array.h
new file mode 100644
index 0000000..7ce9e48
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/array.h
@@ -0,0 +1,135 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/* this comes from the `reuse' library. copy any changes back to the source.
+ *
+ * Variable sized array template. The array is always stored in a contiguous
+ * chunk. The array can be resized. A size increase will cause more memory
+ * to be allocated, and may result in relocation of the array memory.
+ * A size decrease has no effect on the memory allocation.
+ *
+ * Array elements with constructors or destructors are not supported!
+ * But if you must have such elements, here's what to know/do:
+ *   - Bitwise copy is used when copying whole arrays.
+ *   - When copying individual items (via push(), insert() etc) the `='
+ *     (equals) operator is used. Thus you should define this operator to do
+ *     a bitwise copy. You should probably also define the copy constructor.
+ */
+
+
+#ifndef _ODE_ARRAY_H_
+#define _ODE_ARRAY_H_
+
+#include <ode/odeconfig.h>
+
+
+// this base class has no constructors or destructor, for your convenience.
+
+class dArrayBase {
+protected:
+    int _size;		// number of elements in `data'
+    int _anum;		// allocated number of elements in `data'
+    void *_data;		// array data
+
+    void _freeAll (int sizeofT);
+    void _setSize (int newsize, int sizeofT);
+    // set the array size to `newsize', allocating more memory if necessary.
+    // if newsize>_anum and is a power of two then this is guaranteed to
+    // set _size and _anum to newsize.
+
+public:
+    // not: dArrayBase () { _size=0; _anum=0; _data=0; }
+
+    int size() const { return _size; }
+    int allocatedSize() const { return _anum; }
+    void * operator new (size_t size);
+    void operator delete (void *ptr, size_t size);
+
+    void constructor() { _size=0; _anum=0; _data=0; }
+    // if this structure is allocated with malloc() instead of new, you can
+    // call this to set it up.
+
+    void constructLocalArray (int __anum);
+    // this helper function allows non-reallocating arrays to be constructed
+    // on the stack (or in the heap if necessary). this is something of a
+    // kludge and should be used with extreme care. this function acts like
+    // a constructor - it is called on uninitialized memory that will hold the
+    // Array structure and the data. __anum is the number of elements that
+    // are allocated. the memory MUST be allocated with size:
+    //   sizeof(ArrayBase) + __anum*sizeof(T)
+    // arrays allocated this way will never try to reallocate or free the
+    // memory - that's your job.
+};
+
+
+template <class T> class dArray : public dArrayBase {
+public:
+    void equals (const dArray<T> &x) {
+        setSize (x.size());
+        memcpy (_data,x._data,x._size * sizeof(T));
+    }
+
+    dArray () { constructor(); }
+    dArray (const dArray<T> &x) { constructor(); equals (x); }
+    ~dArray () { _freeAll(sizeof(T)); }
+    void setSize (int newsize) { _setSize (newsize,sizeof(T)); }
+    T *data() const { return (T*) _data; }
+    T & operator[] (int i) const { return ((T*)_data)[i]; }
+    void operator = (const dArray<T> &x) { equals (x); }
+
+    void push (const T item) {
+        if (_size < _anum) _size++; else _setSize (_size+1,sizeof(T));
+        memcpy (&(((T*)_data)[_size-1]), &item, sizeof(T));
+    }
+
+    void swap (dArray<T> &x) {
+        int tmp1;
+        void *tmp2;
+        tmp1=_size; _size=x._size; x._size=tmp1;
+        tmp1=_anum; _anum=x._anum; x._anum=tmp1;
+        tmp2=_data; _data=x._data; x._data=tmp2;
+    }
+
+    // insert the item at the position `i'. if i<0 then add the item to the
+    // start, if i >= size then add the item to the end of the array.
+    void insert (int i, const T item) {
+        if (_size < _anum) _size++; else _setSize (_size+1,sizeof(T));
+        if (i >= (_size-1)) i = _size-1;	// add to end
+        else {
+            if (i < 0) i=0;			// add to start
+            int n = _size-1-i;
+            if (n>0) memmove (((T*)_data) + i+1, ((T*)_data) + i, n*sizeof(T));
+        }
+        ((T*)_data)[i] = item;
+    }
+
+    void remove (int i) {
+        if (i >= 0 && i < _size) {	// passing this test guarantees size>0
+            int n = _size-1-i;
+            if (n>0) memmove (((T*)_data) + i, ((T*)_data) + i+1, n*sizeof(T));
+            _size--;
+        }
+    }
+};
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/box.cpp b/libs/ode-0.16.1/ode/src/box.cpp
new file mode 100644
index 0000000..cfedb01
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/box.cpp
@@ -0,0 +1,878 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+  standard ODE geometry primitives: public API and pairwise collision functions.
+
+  the rule is that only the low level primitive collision functions should set
+  dContactGeom::g1 and dContactGeom::g2.
+
+*/
+
+#include <ode/common.h>
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"
+#include "collision_std.h"
+#include "collision_util.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+//****************************************************************************
+// box public API
+
+dxBox::dxBox (dSpaceID space, dReal lx, dReal ly, dReal lz) : dxGeom (space,1)
+{
+    dAASSERT (lx >= 0 && ly >= 0 && lz >= 0);
+    type = dBoxClass;
+    side[0] = lx;
+    side[1] = ly;
+    side[2] = lz;
+    updateZeroSizedFlag(!lx || !ly || !lz);
+}
+
+
+void dxBox::computeAABB()
+{
+    const dMatrix3& R = final_posr->R;
+    const dVector3& pos = final_posr->pos;
+
+    dReal xrange = REAL(0.5) * (dFabs (R[0] * side[0]) +
+        dFabs (R[1] * side[1]) + dFabs (R[2] * side[2]));
+    dReal yrange = REAL(0.5) * (dFabs (R[4] * side[0]) +
+        dFabs (R[5] * side[1]) + dFabs (R[6] * side[2]));
+    dReal zrange = REAL(0.5) * (dFabs (R[8] * side[0]) +
+        dFabs (R[9] * side[1]) + dFabs (R[10] * side[2]));
+    aabb[0] = pos[0] - xrange;
+    aabb[1] = pos[0] + xrange;
+    aabb[2] = pos[1] - yrange;
+    aabb[3] = pos[1] + yrange;
+    aabb[4] = pos[2] - zrange;
+    aabb[5] = pos[2] + zrange;
+}
+
+
+dGeomID dCreateBox (dSpaceID space, dReal lx, dReal ly, dReal lz)
+{
+    return new dxBox (space,lx,ly,lz);
+}
+
+
+void dGeomBoxSetLengths (dGeomID g, dReal lx, dReal ly, dReal lz)
+{
+    dUASSERT (g && g->type == dBoxClass,"argument not a box");
+    dAASSERT (lx >= 0 && ly >= 0 && lz >= 0);
+    dxBox *b = (dxBox*) g;
+    b->side[0] = lx;
+    b->side[1] = ly;
+    b->side[2] = lz;
+    b->updateZeroSizedFlag(!lx || !ly || !lz);
+    dGeomMoved (g);
+}
+
+
+void dGeomBoxGetLengths (dGeomID g, dVector3 result)
+{
+    dUASSERT (g && g->type == dBoxClass,"argument not a box");
+    dxBox *b = (dxBox*) g;
+    result[0] = b->side[0];
+    result[1] = b->side[1];
+    result[2] = b->side[2];
+}
+
+
+dReal dGeomBoxPointDepth (dGeomID g, dReal x, dReal y, dReal z)
+{
+    dUASSERT (g && g->type == dBoxClass,"argument not a box");
+    g->recomputePosr();
+    dxBox *b = (dxBox*) g;
+
+    // Set p = (x,y,z) relative to box center
+    //
+    // This will be (0,0,0) if the point is at (side[0]/2,side[1]/2,side[2]/2)
+
+    dVector3 p,q;
+
+    p[0] = x - b->final_posr->pos[0];
+    p[1] = y - b->final_posr->pos[1];
+    p[2] = z - b->final_posr->pos[2];
+
+    // Rotate p into box's coordinate frame, so we can
+    // treat the OBB as an AABB
+
+    dMultiply1_331 (q,b->final_posr->R,p);
+
+    // Record distance from point to each successive box side, and see
+    // if the point is inside all six sides
+
+    dReal dist[6];
+    int   i;
+
+    bool inside = true;
+
+    for (i=0; i < 3; i++) {
+        dReal side = b->side[i] * REAL(0.5);
+
+        dist[i  ] = side - q[i];
+        dist[i+3] = side + q[i];
+
+        if ((dist[i] < 0) || (dist[i+3] < 0)) {
+            inside = false;
+        }
+    }
+
+    // If point is inside the box, the depth is the smallest positive distance
+    // to any side
+
+    if (inside) {
+        dReal smallest_dist = (dReal) (unsigned) -1;
+
+        for (i=0; i < 6; i++) {
+            if (dist[i] < smallest_dist) smallest_dist = dist[i];
+        }
+
+        return smallest_dist;
+    }
+
+    // Otherwise, if point is outside the box, the depth is the largest
+    // distance to any side.  This is an approximation to the 'proper'
+    // solution (the proper solution may be larger in some cases).
+
+    dReal largest_dist = 0;
+
+    for (i=0; i < 6; i++) {
+        if (dist[i] > largest_dist) largest_dist = dist[i];
+    }
+
+    return -largest_dist;
+}
+
+//****************************************************************************
+// box-box collision utility
+
+
+// find all the intersection points between the 2D rectangle with vertices
+// at (+/-h[0],+/-h[1]) and the 2D quadrilateral with vertices (p[0],p[1]),
+// (p[2],p[3]),(p[4],p[5]),(p[6],p[7]).
+//
+// the intersection points are returned as x,y pairs in the 'ret' array.
+// the number of intersection points is returned by the function (this will
+// be in the range 0 to 8).
+
+static int intersectRectQuad (dReal h[2], dReal p[8], dReal ret[16])
+{
+    // q (and r) contain nq (and nr) coordinate points for the current (and
+    // chopped) polygons
+    int nq=4,nr;
+    dReal buffer[16];
+    dReal *q = p;
+    dReal *r = ret;
+    for (int dir=0; dir <= 1; dir++) {
+        // direction notation: xy[0] = x axis, xy[1] = y axis
+        for (int sign=-1; sign <= 1; sign += 2) {
+            // chop q along the line xy[dir] = sign*h[dir]
+            dReal *pq = q;
+            dReal *pr = r;
+            nr = 0;
+            for (int i=nq; i > 0; i--) {
+                // go through all points in q and all lines between adjacent points
+                if (sign*pq[dir] < h[dir]) {
+                    // this point is inside the chopping line
+                    pr[0] = pq[0];
+                    pr[1] = pq[1];
+                    pr += 2;
+                    nr++;
+                    if (nr & 8) {
+                        q = r;
+                        goto done;
+                    }
+                }
+                dReal *nextq = (i > 1) ? pq+2 : q;
+                if ((sign*pq[dir] < h[dir]) ^ (sign*nextq[dir] < h[dir])) {
+                    // this line crosses the chopping line
+                    pr[1-dir] = pq[1-dir] + (nextq[1-dir]-pq[1-dir]) /
+                        (nextq[dir]-pq[dir]) * (sign*h[dir]-pq[dir]);
+                    pr[dir] = sign*h[dir];
+                    pr += 2;
+                    nr++;
+                    if (nr & 8) {
+                        q = r;
+                        goto done;
+                    }
+                }
+                pq += 2;
+            }
+            q = r;
+            r = (q==ret) ? buffer : ret;
+            nq = nr;
+        }
+    }
+done:
+    if (q != ret) memcpy (ret,q,nr*2*sizeof(dReal));
+    return nr;
+}
+
+
+// given n points in the plane (array p, of size 2*n), generate m points that
+// best represent the whole set. the definition of 'best' here is not
+// predetermined - the idea is to select points that give good box-box
+// collision detection behavior. the chosen point indexes are returned in the
+// array iret (of size m). 'i0' is always the first entry in the array.
+// n must be in the range [1..8]. m must be in the range [1..n]. i0 must be
+// in the range [0..n-1].
+
+void cullPoints (int n, dReal p[], int m, int i0, int iret[])
+{
+    // compute the centroid of the polygon in cx,cy
+    int i,j;
+    dReal a,cx,cy,q;
+    if (n==1) {
+        cx = p[0];
+        cy = p[1];
+    }
+    else if (n==2) {
+        cx = REAL(0.5)*(p[0] + p[2]);
+        cy = REAL(0.5)*(p[1] + p[3]);
+    }
+    else {
+        a = 0;
+        cx = 0;
+        cy = 0;
+        for (i=0; i<(n-1); i++) {
+            q = p[i*2]*p[i*2+3] - p[i*2+2]*p[i*2+1];
+            a += q;
+            cx += q*(p[i*2]+p[i*2+2]);
+            cy += q*(p[i*2+1]+p[i*2+3]);
+        }
+        q = p[n*2-2]*p[1] - p[0]*p[n*2-1];
+        a = dRecip(REAL(3.0)*(a+q));
+        cx = a*(cx + q*(p[n*2-2]+p[0]));
+        cy = a*(cy + q*(p[n*2-1]+p[1]));
+    }
+
+    // compute the angle of each point w.r.t. the centroid
+    dReal A[8];
+    for (i=0; i<n; i++) A[i] = dAtan2(p[i*2+1]-cy,p[i*2]-cx);
+
+    // search for points that have angles closest to A[i0] + i*(2*pi/m).
+    int avail[8];
+    for (i=0; i<n; i++) avail[i] = 1;
+    avail[i0] = 0;
+    iret[0] = i0;
+    iret++;
+    for (j=1; j<m; j++) {
+        a = (dReal)(dReal(j)*(2*M_PI/m) + A[i0]);
+        if (a > M_PI) a -= (dReal)(2*M_PI);
+        dReal maxdiff=1e9,diff;
+#ifndef dNODEBUG
+        *iret = i0;			// iret is not allowed to keep this value
+#endif
+        for (i=0; i<n; i++) {
+            if (avail[i]) {
+                diff = dFabs (A[i]-a);
+                if (diff > M_PI) diff = (dReal) (2*M_PI - diff);
+                if (diff < maxdiff) {
+                    maxdiff = diff;
+                    *iret = i;
+                }
+            }
+        }
+#ifndef dNODEBUG
+        dIASSERT (*iret != i0);	// ensure iret got set
+#endif
+        avail[*iret] = 0;
+        iret++;
+    }
+}
+
+
+// given two boxes (p1,R1,side1) and (p2,R2,side2), collide them together and
+// generate contact points. this returns 0 if there is no contact otherwise
+// it returns the number of contacts generated.
+// `normal' returns the contact normal.
+// `depth' returns the maximum penetration depth along that normal.
+// `return_code' returns a number indicating the type of contact that was
+// detected:
+//        1,2,3 = box 2 intersects with a face of box 1
+//        4,5,6 = box 1 intersects with a face of box 2
+//        7..15 = edge-edge contact
+// `maxc' is the maximum number of contacts allowed to be generated, i.e.
+// the size of the `contact' array.
+// `contact' and `skip' are the contact array information provided to the
+// collision functions. this function only fills in the position and depth
+// fields.
+
+
+int dBoxBox (const dVector3 p1, const dMatrix3 R1,
+             const dVector3 side1, const dVector3 p2,
+             const dMatrix3 R2, const dVector3 side2,
+             dVector3 normal, dReal *depth, int *return_code,
+             int flags, dContactGeom *contact, int skip)
+{
+    const dReal fudge_factor = REAL(1.05);
+    dVector3 p,pp,normalC={0,0,0};
+    const dReal *normalR = 0;
+    dReal A[3],B[3],R11,R12,R13,R21,R22,R23,R31,R32,R33,
+        Q11,Q12,Q13,Q21,Q22,Q23,Q31,Q32,Q33,s,s2,l,expr1_val;
+    int i,j,invert_normal,code;
+
+    // get vector from centers of box 1 to box 2, relative to box 1
+    p[0] = p2[0] - p1[0];
+    p[1] = p2[1] - p1[1];
+    p[2] = p2[2] - p1[2];
+    dMultiply1_331 (pp,R1,p);		// get pp = p relative to body 1
+
+    // get side lengths / 2
+    A[0] = side1[0]*REAL(0.5);
+    A[1] = side1[1]*REAL(0.5);
+    A[2] = side1[2]*REAL(0.5);
+    B[0] = side2[0]*REAL(0.5);
+    B[1] = side2[1]*REAL(0.5);
+    B[2] = side2[2]*REAL(0.5);
+
+    // Rij is R1'*R2, i.e. the relative rotation between R1 and R2
+    R11 = dCalcVectorDot3_44(R1+0,R2+0); R12 = dCalcVectorDot3_44(R1+0,R2+1); R13 = dCalcVectorDot3_44(R1+0,R2+2);
+    R21 = dCalcVectorDot3_44(R1+1,R2+0); R22 = dCalcVectorDot3_44(R1+1,R2+1); R23 = dCalcVectorDot3_44(R1+1,R2+2);
+    R31 = dCalcVectorDot3_44(R1+2,R2+0); R32 = dCalcVectorDot3_44(R1+2,R2+1); R33 = dCalcVectorDot3_44(R1+2,R2+2);
+
+    Q11 = dFabs(R11); Q12 = dFabs(R12); Q13 = dFabs(R13);
+    Q21 = dFabs(R21); Q22 = dFabs(R22); Q23 = dFabs(R23);
+    Q31 = dFabs(R31); Q32 = dFabs(R32); Q33 = dFabs(R33);
+
+    // for all 15 possible separating axes:
+    //   * see if the axis separates the boxes. if so, return 0.
+    //   * find the depth of the penetration along the separating axis (s2)
+    //   * if this is the largest depth so far, record it.
+    // the normal vector will be set to the separating axis with the smallest
+    // depth. note: normalR is set to point to a column of R1 or R2 if that is
+    // the smallest depth normal so far. otherwise normalR is 0 and normalC is
+    // set to a vector relative to body 1. invert_normal is 1 if the sign of
+    // the normal should be flipped.
+
+    do {
+#define TST(expr1,expr2,norm,cc) \
+    expr1_val = (expr1); /* Avoid duplicate evaluation of expr1 */ \
+    s2 = dFabs(expr1_val) - (expr2); \
+    if (s2 > 0) return 0; \
+    if (s2 > s) { \
+    s = s2; \
+    normalR = norm; \
+    invert_normal = ((expr1_val) < 0); \
+    code = (cc); \
+    if (flags & CONTACTS_UNIMPORTANT) break; \
+    }
+
+        s = -dInfinity;
+        invert_normal = 0;
+        code = 0;
+
+        // separating axis = u1,u2,u3
+        TST (pp[0],(A[0] + B[0]*Q11 + B[1]*Q12 + B[2]*Q13),R1+0,1);
+        TST (pp[1],(A[1] + B[0]*Q21 + B[1]*Q22 + B[2]*Q23),R1+1,2);
+        TST (pp[2],(A[2] + B[0]*Q31 + B[1]*Q32 + B[2]*Q33),R1+2,3);
+
+        // separating axis = v1,v2,v3
+        TST (dCalcVectorDot3_41(R2+0,p),(A[0]*Q11 + A[1]*Q21 + A[2]*Q31 + B[0]),R2+0,4);
+        TST (dCalcVectorDot3_41(R2+1,p),(A[0]*Q12 + A[1]*Q22 + A[2]*Q32 + B[1]),R2+1,5);
+        TST (dCalcVectorDot3_41(R2+2,p),(A[0]*Q13 + A[1]*Q23 + A[2]*Q33 + B[2]),R2+2,6);
+
+        // note: cross product axes need to be scaled when s is computed.
+        // normal (n1,n2,n3) is relative to box 1.
+#undef TST
+#define TST(expr1,expr2,n1,n2,n3,cc) \
+    expr1_val = (expr1); /* Avoid duplicate evaluation of expr1 */ \
+    s2 = dFabs(expr1_val) - (expr2); \
+    if (s2 > 0) return 0; \
+    l = dSqrt ((n1)*(n1) + (n2)*(n2) + (n3)*(n3)); \
+    if (l > 0) { \
+    s2 /= l; \
+    if (s2*fudge_factor > s) { \
+    s = s2; \
+    normalR = 0; \
+    normalC[0] = (n1)/l; normalC[1] = (n2)/l; normalC[2] = (n3)/l; \
+    invert_normal = ((expr1_val) < 0); \
+    code = (cc); \
+    if (flags & CONTACTS_UNIMPORTANT) break; \
+    } \
+    }
+
+        // We only need to check 3 edges per box 
+        // since parallel edges are equivalent.
+
+        // separating axis = u1 x (v1,v2,v3)
+        TST(pp[2]*R21-pp[1]*R31,(A[1]*Q31+A[2]*Q21+B[1]*Q13+B[2]*Q12),0,-R31,R21,7);
+        TST(pp[2]*R22-pp[1]*R32,(A[1]*Q32+A[2]*Q22+B[0]*Q13+B[2]*Q11),0,-R32,R22,8);
+        TST(pp[2]*R23-pp[1]*R33,(A[1]*Q33+A[2]*Q23+B[0]*Q12+B[1]*Q11),0,-R33,R23,9);
+
+        // separating axis = u2 x (v1,v2,v3)
+        TST(pp[0]*R31-pp[2]*R11,(A[0]*Q31+A[2]*Q11+B[1]*Q23+B[2]*Q22),R31,0,-R11,10);
+        TST(pp[0]*R32-pp[2]*R12,(A[0]*Q32+A[2]*Q12+B[0]*Q23+B[2]*Q21),R32,0,-R12,11);
+        TST(pp[0]*R33-pp[2]*R13,(A[0]*Q33+A[2]*Q13+B[0]*Q22+B[1]*Q21),R33,0,-R13,12);
+
+        // separating axis = u3 x (v1,v2,v3)
+        TST(pp[1]*R11-pp[0]*R21,(A[0]*Q21+A[1]*Q11+B[1]*Q33+B[2]*Q32),-R21,R11,0,13);
+        TST(pp[1]*R12-pp[0]*R22,(A[0]*Q22+A[1]*Q12+B[0]*Q33+B[2]*Q31),-R22,R12,0,14);
+        TST(pp[1]*R13-pp[0]*R23,(A[0]*Q23+A[1]*Q13+B[0]*Q32+B[1]*Q31),-R23,R13,0,15);
+#undef TST
+    } while (0);
+
+    if (!code) return 0;
+
+    // if we get to this point, the boxes interpenetrate. compute the normal
+    // in global coordinates.
+    if (normalR) {
+        normal[0] = normalR[0];
+        normal[1] = normalR[4];
+        normal[2] = normalR[8];
+    }
+    else {
+        dMultiply0_331 (normal,R1,normalC);
+    }
+    if (invert_normal) {
+        normal[0] = -normal[0];
+        normal[1] = -normal[1];
+        normal[2] = -normal[2];
+    }
+    *depth = -s;
+
+    // compute contact point(s)
+
+    if (code > 6) {
+        // An edge from box 1 touches an edge from box 2.
+        // find a point pa on the intersecting edge of box 1
+        dVector3 pa;
+        dReal sign;
+        // Copy p1 into pa
+        for (i=0; i<3; i++) pa[i] = p1[i]; // why no memcpy?
+        // Get world position of p2 into pa
+        for (j=0; j<3; j++) {
+            sign = (dCalcVectorDot3_14(normal,R1+j) > 0) ? REAL(1.0) : REAL(-1.0);
+            for (i=0; i<3; i++) pa[i] += sign * A[j] * R1[i*4+j];
+        }
+
+        // find a point pb on the intersecting edge of box 2
+        dVector3 pb;
+        // Copy p2 into pb
+        for (i=0; i<3; i++) pb[i] = p2[i]; // why no memcpy?
+        // Get world position of p2 into pb
+        for (j=0; j<3; j++) {
+            sign = (dCalcVectorDot3_14(normal,R2+j) > 0) ? REAL(-1.0) : REAL(1.0);
+            for (i=0; i<3; i++) pb[i] += sign * B[j] * R2[i*4+j];
+        }
+
+        dReal alpha,beta;
+        dVector3 ua,ub;
+        // Get direction of first edge
+        for (i=0; i<3; i++) ua[i] = R1[((code)-7)/3 + i*4];
+        // Get direction of second edge
+        for (i=0; i<3; i++) ub[i] = R2[((code)-7)%3 + i*4];
+        // Get closest points between edges (one at each)
+        dLineClosestApproach (pa,ua,pb,ub,&alpha,&beta);    
+        for (i=0; i<3; i++) pa[i] += ua[i]*alpha;
+        for (i=0; i<3; i++) pb[i] += ub[i]*beta;
+        // Set the contact point as halfway between the 2 closest points
+        for (i=0; i<3; i++) contact[0].pos[i] = REAL(0.5)*(pa[i]+pb[i]);
+        contact[0].depth = *depth;
+        *return_code = code;
+        return 1;
+    }
+
+    // okay, we have a face-something intersection (because the separating
+    // axis is perpendicular to a face). define face 'a' to be the reference
+    // face (i.e. the normal vector is perpendicular to this) and face 'b' to be
+    // the incident face (the closest face of the other box).
+    // Note: Unmodified parameter values are being used here
+    const dReal *Ra,*Rb,*pa,*pb,*Sa,*Sb;
+    if (code <= 3) { // One of the faces of box 1 is the reference face
+        Ra = R1; // Rotation of 'a'
+        Rb = R2; // Rotation of 'b'
+        pa = p1; // Center (location) of 'a'
+        pb = p2; // Center (location) of 'b'
+        Sa = A;  // Side Lenght of 'a'
+        Sb = B;  // Side Lenght of 'b'
+    }
+    else { // One of the faces of box 2 is the reference face
+        Ra = R2; // Rotation of 'a'
+        Rb = R1; // Rotation of 'b'
+        pa = p2; // Center (location) of 'a'
+        pb = p1; // Center (location) of 'b'
+        Sa = B;  // Side Lenght of 'a'
+        Sb = A;  // Side Lenght of 'b'
+    }
+
+    // nr = normal vector of reference face dotted with axes of incident box.
+    // anr = absolute values of nr.
+    /*
+    The normal is flipped if necessary so it always points outward from box 'a',
+    box 'b' is thus always the incident box
+    */
+    dVector3 normal2,nr,anr;
+    if (code <= 3) {
+        normal2[0] = normal[0];
+        normal2[1] = normal[1];
+        normal2[2] = normal[2];
+    }
+    else {
+        normal2[0] = -normal[0];
+        normal2[1] = -normal[1];
+        normal2[2] = -normal[2];
+    }
+    // Rotate normal2 in incident box opposite direction
+    dMultiply1_331 (nr,Rb,normal2);
+    anr[0] = dFabs (nr[0]);
+    anr[1] = dFabs (nr[1]);
+    anr[2] = dFabs (nr[2]);
+
+    // find the largest compontent of anr: this corresponds to the normal
+    // for the incident face. the other axis numbers of the incident face
+    // are stored in a1,a2.
+    int lanr,a1,a2;
+    if (anr[1] > anr[0]) {
+        if (anr[1] > anr[2]) {
+            a1 = 0;
+            lanr = 1;
+            a2 = 2;
+        }
+        else {
+            a1 = 0;
+            a2 = 1;
+            lanr = 2;
+        }
+    }
+    else {
+        if (anr[0] > anr[2]) {
+            lanr = 0;
+            a1 = 1;
+            a2 = 2;
+        }
+        else {
+            a1 = 0;
+            a2 = 1;
+            lanr = 2;
+        }
+    }
+
+    // compute center point of incident face, in reference-face coordinates
+    dVector3 center;
+    if (nr[lanr] < 0) {
+        for (i=0; i<3; i++) center[i] = pb[i] - pa[i] + Sb[lanr] * Rb[i*4+lanr];
+    }
+    else {
+        for (i=0; i<3; i++) center[i] = pb[i] - pa[i] - Sb[lanr] * Rb[i*4+lanr];
+    }
+
+    // find the normal and non-normal axis numbers of the reference box
+    int codeN,code1,code2;
+    if (code <= 3) codeN = code-1; else codeN = code-4;
+    if (codeN==0) {
+        code1 = 1;
+        code2 = 2;
+    }
+    else if (codeN==1) {
+        code1 = 0;
+        code2 = 2;
+    }
+    else {
+        code1 = 0;
+        code2 = 1;
+    }
+
+    // find the four corners of the incident face, in reference-face coordinates
+    dReal quad[8];	// 2D coordinate of incident face (x,y pairs)
+    dReal c1,c2,m11,m12,m21,m22;
+    c1 = dCalcVectorDot3_14 (center,Ra+code1);
+    c2 = dCalcVectorDot3_14 (center,Ra+code2);
+    // optimize this? - we have already computed this data above, but it is not
+    // stored in an easy-to-index format. for now it's quicker just to recompute
+    // the four dot products.
+    m11 = dCalcVectorDot3_44 (Ra+code1,Rb+a1);
+    m12 = dCalcVectorDot3_44 (Ra+code1,Rb+a2);
+    m21 = dCalcVectorDot3_44 (Ra+code2,Rb+a1);
+    m22 = dCalcVectorDot3_44 (Ra+code2,Rb+a2);
+    {
+        dReal k1 = m11*Sb[a1];
+        dReal k2 = m21*Sb[a1];
+        dReal k3 = m12*Sb[a2];
+        dReal k4 = m22*Sb[a2];
+        quad[0] = c1 - k1 - k3;
+        quad[1] = c2 - k2 - k4;
+        quad[2] = c1 - k1 + k3;
+        quad[3] = c2 - k2 + k4;
+        quad[4] = c1 + k1 + k3;
+        quad[5] = c2 + k2 + k4;
+        quad[6] = c1 + k1 - k3;
+        quad[7] = c2 + k2 - k4;
+    }
+
+    // find the size of the reference face
+    dReal rect[2];
+    rect[0] = Sa[code1];
+    rect[1] = Sa[code2];
+
+    // intersect the incident and reference faces
+    dReal ret[16];
+    int n = intersectRectQuad (rect,quad,ret);
+    if (n < 1) return 0;		// this should never happen
+
+    // convert the intersection points into reference-face coordinates,
+    // and compute the contact position and depth for each point. only keep
+    // those points that have a positive (penetrating) depth. delete points in
+    // the 'ret' array as necessary so that 'point' and 'ret' correspond.
+    dReal point[3*8];		// penetrating contact points
+    dReal dep[8];			// depths for those points
+    dReal det1 = dRecip(m11*m22 - m12*m21);
+    m11 *= det1;
+    m12 *= det1;
+    m21 *= det1;
+    m22 *= det1;
+    int cnum = 0;			// number of penetrating contact points found
+    for (j=0; j < n; j++) {
+        dReal k1 =  m22*(ret[j*2]-c1) - m12*(ret[j*2+1]-c2);
+        dReal k2 = -m21*(ret[j*2]-c1) + m11*(ret[j*2+1]-c2);
+        for (i=0; i<3; i++) point[cnum*3+i] =
+            center[i] + k1*Rb[i*4+a1] + k2*Rb[i*4+a2];
+        dep[cnum] = Sa[codeN] - dCalcVectorDot3(normal2,point+cnum*3);
+        if (dep[cnum] >= 0) {
+            ret[cnum*2] = ret[j*2];
+            ret[cnum*2+1] = ret[j*2+1];
+            cnum++;
+            if ((cnum | CONTACTS_UNIMPORTANT) == (flags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                break;
+            }
+        }
+    }
+    if (cnum < 1) { 
+        return 0;	// this should not happen, yet does at times (demo_plane2d single precision).
+    }
+
+    // we can't generate more contacts than we actually have
+    int maxc = flags & NUMC_MASK;
+    if (maxc > cnum) maxc = cnum;
+    if (maxc < 1) maxc = 1;	// Even though max count must not be zero this check is kept for backward compatibility as this is a public function
+
+    if (cnum <= maxc) {
+        // we have less contacts than we need, so we use them all
+        for (j=0; j < cnum; j++) {
+            dContactGeom *con = CONTACT(contact,skip*j);
+            for (i=0; i<3; i++) con->pos[i] = point[j*3+i] + pa[i];
+            con->depth = dep[j];
+        }
+    }
+    else {
+        dIASSERT(!(flags & CONTACTS_UNIMPORTANT)); // cnum should be generated not greater than maxc so that "then" clause is executed
+        // we have more contacts than are wanted, some of them must be culled.
+        // find the deepest point, it is always the first contact.
+        int i1 = 0;
+        dReal maxdepth = dep[0];
+        for (i=1; i<cnum; i++) {
+            if (dep[i] > maxdepth) {
+                maxdepth = dep[i];
+                i1 = i;
+            }
+        }
+
+        int iret[8];
+        cullPoints (cnum,ret,maxc,i1,iret);
+
+        for (j=0; j < maxc; j++) {
+            dContactGeom *con = CONTACT(contact,skip*j);
+            for (i=0; i<3; i++) con->pos[i] = point[iret[j]*3+i] + pa[i];
+            con->depth = dep[iret[j]];
+        }
+        cnum = maxc;
+    }
+
+    *return_code = code;
+    return cnum;
+}
+
+
+
+int dCollideBoxBox (dxGeom *o1, dxGeom *o2, int flags,
+                    dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dBoxClass);
+    dIASSERT (o2->type == dBoxClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dVector3 normal;
+    dReal depth;
+    int code;
+    dxBox *b1 = (dxBox*) o1;
+    dxBox *b2 = (dxBox*) o2;
+    int num = dBoxBox (o1->final_posr->pos,o1->final_posr->R,b1->side, o2->final_posr->pos,o2->final_posr->R,b2->side,
+        normal,&depth,&code,flags,contact,skip);
+    for (int i=0; i<num; i++) {
+        dContactGeom *currContact = CONTACT(contact,i*skip);
+        currContact->normal[0] = -normal[0];
+        currContact->normal[1] = -normal[1];
+        currContact->normal[2] = -normal[2];
+        currContact->g1 = o1;
+        currContact->g2 = o2;
+        currContact->side1 = -1;
+        currContact->side2 = -1;
+    }
+    return num;
+}
+
+
+int dCollideBoxPlane (dxGeom *o1, dxGeom *o2,
+                      int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dBoxClass);
+    dIASSERT (o2->type == dPlaneClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxBox *box = (dxBox*) o1;
+    dxPlane *plane = (dxPlane*) o2;
+
+    contact->g1 = o1;
+    contact->g2 = o2;
+    contact->side1 = -1;
+    contact->side2 = -1;
+
+    int ret = 0;
+
+    //@@@ problem: using 4-vector (plane->p) as 3-vector (normal).
+    const dReal *R = o1->final_posr->R;		// rotation of box
+    const dReal *n = plane->p;		// normal vector
+
+    // project sides lengths along normal vector, get absolute values
+    dReal Q1 = dCalcVectorDot3_14(n,R+0);
+    dReal Q2 = dCalcVectorDot3_14(n,R+1);
+    dReal Q3 = dCalcVectorDot3_14(n,R+2);
+    dReal A1 = box->side[0] * Q1;
+    dReal A2 = box->side[1] * Q2;
+    dReal A3 = box->side[2] * Q3;
+    dReal B1 = dFabs(A1);
+    dReal B2 = dFabs(A2);
+    dReal B3 = dFabs(A3);
+
+    // early exit test
+    dReal depth = plane->p[3] + REAL(0.5)*(B1+B2+B3) - dCalcVectorDot3(n,o1->final_posr->pos);
+    if (depth < 0) return 0;
+
+    // find number of contacts requested
+    int maxc = flags & NUMC_MASK;
+    // if (maxc < 1) maxc = 1; // an assertion is made on entry
+    if (maxc > 4) maxc = 4;	// not more than 4 contacts per box allowed
+
+    // find deepest point
+    dVector3 p;
+    p[0] = o1->final_posr->pos[0];
+    p[1] = o1->final_posr->pos[1];
+    p[2] = o1->final_posr->pos[2];
+#define FOO(i,op) \
+    p[0] op REAL(0.5)*box->side[i] * R[0+i]; \
+    p[1] op REAL(0.5)*box->side[i] * R[4+i]; \
+    p[2] op REAL(0.5)*box->side[i] * R[8+i];
+#define BAR(i,iinc) if (A ## iinc > 0) { FOO(i,-=) } else { FOO(i,+=) }
+    BAR(0,1);
+    BAR(1,2);
+    BAR(2,3);
+#undef FOO
+#undef BAR
+
+    // the deepest point is the first contact point
+    contact->pos[0] = p[0];
+    contact->pos[1] = p[1];
+    contact->pos[2] = p[2];
+    contact->depth = depth;
+    ret = 1;		// ret is number of contact points found so far
+    if (maxc == 1) goto done;
+
+    // get the second and third contact points by starting from `p' and going
+    // along the two sides with the smallest projected length.
+
+#define FOO(i,j,op) \
+    CONTACT(contact,i*skip)->pos[0] = p[0] op box->side[j] * R[0+j]; \
+    CONTACT(contact,i*skip)->pos[1] = p[1] op box->side[j] * R[4+j]; \
+    CONTACT(contact,i*skip)->pos[2] = p[2] op box->side[j] * R[8+j];
+#define BAR(ctact,side,sideinc) \
+    if (depth - B ## sideinc < 0) goto done; \
+    if (A ## sideinc > 0) { FOO(ctact,side,+); } else { FOO(ctact,side,-); } \
+    CONTACT(contact,ctact*skip)->depth = depth - B ## sideinc; \
+    ret++;
+
+    if (B1 < B2) {
+        if (B3 < B1) goto use_side_3; else {
+            BAR(1,0,1);	// use side 1
+            if (maxc == 2) goto done;
+            if (B2 < B3) goto contact2_2; else goto contact2_3;
+        }
+    }
+    else {
+        if (B3 < B2) {
+use_side_3:	// use side 3
+            BAR(1,2,3);
+            if (maxc == 2) goto done;
+            if (B1 < B2) goto contact2_1; else goto contact2_2;
+        }
+        else {
+            BAR(1,1,2);	// use side 2
+            if (maxc == 2) goto done;
+            if (B1 < B3) goto contact2_1; else goto contact2_3;
+        }
+    }
+
+contact2_1: BAR(2,0,1); goto done;
+contact2_2: BAR(2,1,2); goto done;
+contact2_3: BAR(2,2,3); goto done;
+#undef FOO
+#undef BAR
+
+done:
+
+    if (maxc == 4 && ret == 3) { // If user requested 4 contacts, and the first 3 were created...
+        // Combine contacts 2 and 3 (vectorial sum) and get the fourth one
+        // Result: if a box face is completely inside a plane, contacts are created for all the 4 vertices
+        dReal d4 = CONTACT(contact,1*skip)->depth + CONTACT(contact,2*skip)->depth - depth;  // depth is the depth for first contact
+        if (d4 > 0) {
+            CONTACT(contact,3*skip)->pos[0] = CONTACT(contact,1*skip)->pos[0] + CONTACT(contact,2*skip)->pos[0] - p[0]; // p is the position of first contact
+            CONTACT(contact,3*skip)->pos[1] = CONTACT(contact,1*skip)->pos[1] + CONTACT(contact,2*skip)->pos[1] - p[1];
+            CONTACT(contact,3*skip)->pos[2] = CONTACT(contact,1*skip)->pos[2] + CONTACT(contact,2*skip)->pos[2] - p[2];
+            CONTACT(contact,3*skip)->depth  = d4;
+            ret++;
+        }
+    }
+
+    for (int i=0; i<ret; i++) {
+        dContactGeom *currContact = CONTACT(contact,i*skip);
+        currContact->g1 = o1;
+        currContact->g2 = o2;
+        currContact->side1 = -1;
+        currContact->side2 = -1;
+
+        currContact->normal[0] = n[0];
+        currContact->normal[1] = n[1];
+        currContact->normal[2] = n[2];
+    }
+    return ret;
+}
diff --git a/libs/ode-0.16.1/ode/src/capsule.cpp b/libs/ode-0.16.1/ode/src/capsule.cpp
new file mode 100644
index 0000000..80e24ac
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/capsule.cpp
@@ -0,0 +1,416 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+standard ODE geometry primitives: public API and pairwise collision functions.
+
+the rule is that only the low level primitive collision functions should set
+dContactGeom::g1 and dContactGeom::g2.
+
+*/
+
+#include <ode/common.h>
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"
+#include "collision_std.h"
+#include "collision_util.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+//****************************************************************************
+// capped cylinder public API
+
+dxCapsule::dxCapsule (dSpaceID space, dReal _radius, dReal _length) :
+dxGeom (space,1)
+{
+    dAASSERT (_radius >= 0 && _length >= 0);
+    type = dCapsuleClass;
+    radius = _radius;
+    lz = _length;
+    updateZeroSizedFlag(!_radius/* || !_length -- zero length capsule is not a zero sized capsule*/);
+}
+
+
+void dxCapsule::computeAABB()
+{
+    const dMatrix3& R = final_posr->R;
+    const dVector3& pos = final_posr->pos;
+
+    dReal xrange = dFabs(R[2]  * lz) * REAL(0.5) + radius;
+    dReal yrange = dFabs(R[6]  * lz) * REAL(0.5) + radius;
+    dReal zrange = dFabs(R[10] * lz) * REAL(0.5) + radius;
+    aabb[0] = pos[0] - xrange;
+    aabb[1] = pos[0] + xrange;
+    aabb[2] = pos[1] - yrange;
+    aabb[3] = pos[1] + yrange;
+    aabb[4] = pos[2] - zrange;
+    aabb[5] = pos[2] + zrange;
+}
+
+
+dGeomID dCreateCapsule (dSpaceID space, dReal radius, dReal length)
+{
+    return new dxCapsule (space,radius,length);
+}
+
+
+void dGeomCapsuleSetParams (dGeomID g, dReal radius, dReal length)
+{
+    dUASSERT (g && g->type == dCapsuleClass,"argument not a ccylinder");
+    dAASSERT (radius >= 0 && length >= 0);
+    dxCapsule *c = (dxCapsule*) g;
+    c->radius = radius;
+    c->lz = length;
+    c->updateZeroSizedFlag(!radius/* || !length -- zero length capsule is not a zero sized capsule*/);
+    dGeomMoved (g);
+}
+
+
+void dGeomCapsuleGetParams (dGeomID g, dReal *radius, dReal *length)
+{
+    dUASSERT (g && g->type == dCapsuleClass,"argument not a ccylinder");
+    dxCapsule *c = (dxCapsule*) g;
+    *radius = c->radius;
+    *length = c->lz;
+}
+
+
+dReal dGeomCapsulePointDepth (dGeomID g, dReal x, dReal y, dReal z)
+{
+    dUASSERT (g && g->type == dCapsuleClass,"argument not a ccylinder");
+    g->recomputePosr();
+    dxCapsule *c = (dxCapsule*) g;
+
+    const dReal* R = g->final_posr->R;
+    const dReal* pos = g->final_posr->pos;
+
+    dVector3 a;
+    a[0] = x - pos[0];
+    a[1] = y - pos[1];
+    a[2] = z - pos[2];
+    dReal beta = dCalcVectorDot3_14(a,R+2);
+    dReal lz2 = c->lz*REAL(0.5);
+    if (beta < -lz2) beta = -lz2;
+    else if (beta > lz2) beta = lz2;
+    a[0] = c->final_posr->pos[0] + beta*R[0*4+2];
+    a[1] = c->final_posr->pos[1] + beta*R[1*4+2];
+    a[2] = c->final_posr->pos[2] + beta*R[2*4+2];
+    return c->radius -
+        dSqrt ((x-a[0])*(x-a[0]) + (y-a[1])*(y-a[1]) + (z-a[2])*(z-a[2]));
+}
+
+
+
+int dCollideCapsuleSphere (dxGeom *o1, dxGeom *o2, int flags,
+                           dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dCapsuleClass);
+    dIASSERT (o2->type == dSphereClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxCapsule *ccyl = (dxCapsule*) o1;
+    dxSphere *sphere = (dxSphere*) o2;
+
+    contact->g1 = o1;
+    contact->g2 = o2;
+    contact->side1 = -1;
+    contact->side2 = -1;
+
+    // find the point on the cylinder axis that is closest to the sphere
+    dReal alpha = 
+        o1->final_posr->R[2]  * (o2->final_posr->pos[0] - o1->final_posr->pos[0]) +
+        o1->final_posr->R[6]  * (o2->final_posr->pos[1] - o1->final_posr->pos[1]) +
+        o1->final_posr->R[10] * (o2->final_posr->pos[2] - o1->final_posr->pos[2]);
+    dReal lz2 = ccyl->lz * REAL(0.5);
+    if (alpha > lz2) alpha = lz2;
+    if (alpha < -lz2) alpha = -lz2;
+
+    // collide the spheres
+    dVector3 p;
+    p[0] = o1->final_posr->pos[0] + alpha * o1->final_posr->R[2];
+    p[1] = o1->final_posr->pos[1] + alpha * o1->final_posr->R[6];
+    p[2] = o1->final_posr->pos[2] + alpha * o1->final_posr->R[10];
+    return dCollideSpheres (p,ccyl->radius,o2->final_posr->pos,sphere->radius,contact);
+}
+
+
+int dCollideCapsuleBox (dxGeom *o1, dxGeom *o2, int flags,
+                        dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dCapsuleClass);
+    dIASSERT (o2->type == dBoxClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxCapsule *cyl = (dxCapsule*) o1;
+    dxBox *box = (dxBox*) o2;
+
+    contact->g1 = o1;
+    contact->g2 = o2;
+    contact->side1 = -1;
+    contact->side2 = -1;
+
+    // get p1,p2 = cylinder axis endpoints, get radius
+    dVector3 p1,p2;
+    dReal clen = cyl->lz * REAL(0.5);
+    p1[0] = o1->final_posr->pos[0] + clen * o1->final_posr->R[2];
+    p1[1] = o1->final_posr->pos[1] + clen * o1->final_posr->R[6];
+    p1[2] = o1->final_posr->pos[2] + clen * o1->final_posr->R[10];
+    p2[0] = o1->final_posr->pos[0] - clen * o1->final_posr->R[2];
+    p2[1] = o1->final_posr->pos[1] - clen * o1->final_posr->R[6];
+    p2[2] = o1->final_posr->pos[2] - clen * o1->final_posr->R[10];
+    dReal radius = cyl->radius;
+
+    // copy out box center, rotation matrix, and side array
+    dReal *c = o2->final_posr->pos;
+    dReal *R = o2->final_posr->R;
+    const dReal *side = box->side;
+
+    // get the closest point between the cylinder axis and the box
+    dVector3 pl,pb;
+    dClosestLineBoxPoints (p1,p2,c,R,side,pl,pb);
+
+    // if the capsule is penetrated further than radius 
+    //  then pl and pb are equal (up to mindist) -> unknown normal
+    // use normal vector of closest box surface
+#ifdef dSINGLE
+    dReal mindist = REAL(1e-6);
+#else
+    dReal mindist = REAL(1e-15);
+#endif
+    if (dCalcPointsDistance3(pl, pb)<mindist) {
+        // consider capsule as box
+        dVector3 normal;
+        dReal depth;
+        int code;
+        // WARNING! rad2 is declared as #define in Microsoft headers (as well as psh2, chx2, grp2, frm2, rct2, ico2, stc2, lst2, cmb2, edt2, scr2). Avoid abbreviations!
+        /* dReal rad2 = radius*REAL(2.0); */ dReal radiusMul2 = radius * REAL(2.0);
+        const dVector3 capboxside = {radiusMul2, radiusMul2, cyl->lz + radiusMul2};
+        int num = dBoxBox (c, R, side, 
+            o1->final_posr->pos, o1->final_posr->R, capboxside,
+            normal, &depth, &code, flags, contact, skip);
+
+        for (int i=0; i<num; i++) {
+            dContactGeom *currContact = CONTACT(contact,i*skip);
+            currContact->normal[0] = normal[0];
+            currContact->normal[1] = normal[1];
+            currContact->normal[2] = normal[2];
+            currContact->g1 = o1;
+            currContact->g2 = o2;
+            currContact->side1 = -1;
+            currContact->side2 = -1;
+        }
+        return num;
+    } else {
+        // generate contact point
+        return dCollideSpheres (pl,radius,pb,0,contact);
+    }
+}
+
+
+int dCollideCapsuleCapsule (dxGeom *o1, dxGeom *o2,
+                            int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dCapsuleClass);
+    dIASSERT (o2->type == dCapsuleClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    int i;
+    const dReal tolerance = REAL(1e-5);
+
+    dxCapsule *cyl1 = (dxCapsule*) o1;
+    dxCapsule *cyl2 = (dxCapsule*) o2;
+
+    contact->g1 = o1;
+    contact->g2 = o2;
+    contact->side1 = -1;
+    contact->side2 = -1;
+
+    // copy out some variables, for convenience
+    dReal lz1 = cyl1->lz * REAL(0.5);
+    dReal lz2 = cyl2->lz * REAL(0.5);
+    dReal *pos1 = o1->final_posr->pos;
+    dReal *pos2 = o2->final_posr->pos;
+    dReal axis1[3],axis2[3];
+    axis1[0] = o1->final_posr->R[2];
+    axis1[1] = o1->final_posr->R[6];
+    axis1[2] = o1->final_posr->R[10];
+    axis2[0] = o2->final_posr->R[2];
+    axis2[1] = o2->final_posr->R[6];
+    axis2[2] = o2->final_posr->R[10];
+
+    // if the cylinder axes are close to parallel, we'll try to detect up to
+    // two contact points along the body of the cylinder. if we can't find any
+    // points then we'll fall back to the closest-points algorithm. note that
+    // we are not treating this special case for reasons of degeneracy, but
+    // because we want two contact points in some situations. the closet-points
+    // algorithm is robust in all casts, but it can return only one contact.
+
+    dVector3 sphere1,sphere2;
+    dReal a1a2 = dCalcVectorDot3 (axis1,axis2);
+    dReal det = REAL(1.0)-a1a2*a1a2;
+    if (det < tolerance) {
+        // the cylinder axes (almost) parallel, so we will generate up to two
+        // contacts. alpha1 and alpha2 (line position parameters) are related by:
+        //       alpha2 =   alpha1 + (pos1-pos2)'*axis1   (if axis1==axis2)
+        //    or alpha2 = -(alpha1 + (pos1-pos2)'*axis1)  (if axis1==-axis2)
+        // first compute where the two cylinders overlap in alpha1 space:
+        if (a1a2 < 0) {
+            axis2[0] = -axis2[0];
+            axis2[1] = -axis2[1];
+            axis2[2] = -axis2[2];
+        }
+        dReal q[3];
+        for (i=0; i<3; i++) q[i] = pos1[i]-pos2[i];
+        dReal k = dCalcVectorDot3 (axis1,q);
+        dReal a1lo = -lz1;
+        dReal a1hi = lz1;
+        dReal a2lo = -lz2 - k;
+        dReal a2hi = lz2 - k;
+        dReal lo = (a1lo > a2lo) ? a1lo : a2lo;
+        dReal hi = (a1hi < a2hi) ? a1hi : a2hi;
+        if (lo <= hi) {
+            int num_contacts = flags & NUMC_MASK;
+            if (num_contacts >= 2 && lo < hi) {
+                // generate up to two contacts. if one of those contacts is
+                // not made, fall back on the one-contact strategy.
+                for (i=0; i<3; i++) sphere1[i] = pos1[i] + lo*axis1[i];
+                for (i=0; i<3; i++) sphere2[i] = pos2[i] + (lo+k)*axis2[i];
+                int n1 = dCollideSpheres (sphere1,cyl1->radius,
+                    sphere2,cyl2->radius,contact);
+                if (n1) {
+                    for (i=0; i<3; i++) sphere1[i] = pos1[i] + hi*axis1[i];
+                    for (i=0; i<3; i++) sphere2[i] = pos2[i] + (hi+k)*axis2[i];
+                    dContactGeom *c2 = CONTACT(contact,skip);
+                    int n2 = dCollideSpheres (sphere1,cyl1->radius,
+                        sphere2,cyl2->radius, c2);
+                    if (n2) {
+                        c2->g1 = o1;
+                        c2->g2 = o2;
+                        c2->side1 = -1;
+                        c2->side2 = -1;
+                        return 2;
+                    }
+                }
+            }
+
+            // just one contact to generate, so put it in the middle of
+            // the range
+            dReal alpha1 = (lo + hi) * REAL(0.5);
+            dReal alpha2 = alpha1 + k;
+            for (i=0; i<3; i++) sphere1[i] = pos1[i] + alpha1*axis1[i];
+            for (i=0; i<3; i++) sphere2[i] = pos2[i] + alpha2*axis2[i];
+            return dCollideSpheres (sphere1,cyl1->radius,
+                sphere2,cyl2->radius,contact);
+        }
+    }
+
+    // use the closest point algorithm
+    dVector3 a1,a2,b1,b2;
+    a1[0] = o1->final_posr->pos[0] + axis1[0]*lz1;
+    a1[1] = o1->final_posr->pos[1] + axis1[1]*lz1;
+    a1[2] = o1->final_posr->pos[2] + axis1[2]*lz1;
+    a2[0] = o1->final_posr->pos[0] - axis1[0]*lz1;
+    a2[1] = o1->final_posr->pos[1] - axis1[1]*lz1;
+    a2[2] = o1->final_posr->pos[2] - axis1[2]*lz1;
+    b1[0] = o2->final_posr->pos[0] + axis2[0]*lz2;
+    b1[1] = o2->final_posr->pos[1] + axis2[1]*lz2;
+    b1[2] = o2->final_posr->pos[2] + axis2[2]*lz2;
+    b2[0] = o2->final_posr->pos[0] - axis2[0]*lz2;
+    b2[1] = o2->final_posr->pos[1] - axis2[1]*lz2;
+    b2[2] = o2->final_posr->pos[2] - axis2[2]*lz2;
+
+    dClosestLineSegmentPoints (a1,a2,b1,b2,sphere1,sphere2);
+    return dCollideSpheres (sphere1,cyl1->radius,sphere2,cyl2->radius,contact);
+}
+
+
+int dCollideCapsulePlane (dxGeom *o1, dxGeom *o2, int flags,
+                          dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dCapsuleClass);
+    dIASSERT (o2->type == dPlaneClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxCapsule *ccyl = (dxCapsule*) o1;
+    dxPlane *plane = (dxPlane*) o2;
+
+    // collide the deepest capping sphere with the plane
+    dReal sign = (dCalcVectorDot3_14 (plane->p,o1->final_posr->R+2) > 0) ? REAL(-1.0) : REAL(1.0);
+    dVector3 p;
+    p[0] = o1->final_posr->pos[0] + o1->final_posr->R[2]  * ccyl->lz * REAL(0.5) * sign;
+    p[1] = o1->final_posr->pos[1] + o1->final_posr->R[6]  * ccyl->lz * REAL(0.5) * sign;
+    p[2] = o1->final_posr->pos[2] + o1->final_posr->R[10] * ccyl->lz * REAL(0.5) * sign;
+
+    dReal k = dCalcVectorDot3 (p,plane->p);
+    dReal depth = plane->p[3] - k + ccyl->radius;
+    if (depth < 0) return 0;
+    contact->normal[0] = plane->p[0];
+    contact->normal[1] = plane->p[1];
+    contact->normal[2] = plane->p[2];
+    contact->pos[0] = p[0] - plane->p[0] * ccyl->radius;
+    contact->pos[1] = p[1] - plane->p[1] * ccyl->radius;
+    contact->pos[2] = p[2] - plane->p[2] * ccyl->radius;
+    contact->depth = depth;
+
+    int ncontacts = 1;
+    if ((flags & NUMC_MASK) >= 2) {
+        // collide the other capping sphere with the plane
+        p[0] = o1->final_posr->pos[0] - o1->final_posr->R[2]  * ccyl->lz * REAL(0.5) * sign;
+        p[1] = o1->final_posr->pos[1] - o1->final_posr->R[6]  * ccyl->lz * REAL(0.5) * sign;
+        p[2] = o1->final_posr->pos[2] - o1->final_posr->R[10] * ccyl->lz * REAL(0.5) * sign;
+
+        k = dCalcVectorDot3 (p,plane->p);
+        depth = plane->p[3] - k + ccyl->radius;
+        if (depth >= 0) {
+            dContactGeom *c2 = CONTACT(contact,skip);
+            c2->normal[0] = plane->p[0];
+            c2->normal[1] = plane->p[1];
+            c2->normal[2] = plane->p[2];
+            c2->pos[0] = p[0] - plane->p[0] * ccyl->radius;
+            c2->pos[1] = p[1] - plane->p[1] * ccyl->radius;
+            c2->pos[2] = p[2] - plane->p[2] * ccyl->radius;
+            c2->depth = depth;
+            ncontacts = 2;
+        }
+    }
+
+    for (int i=0; i < ncontacts; i++) {
+        dContactGeom *currContact = CONTACT(contact,i*skip);
+        currContact->g1 = o1;
+        currContact->g2 = o2;
+        currContact->side1 = -1;
+        currContact->side2 = -1;
+    }
+    return ncontacts;
+}
+
diff --git a/libs/ode-0.16.1/ode/src/collision_convex_trimesh.cpp b/libs/ode-0.16.1/ode/src/collision_convex_trimesh.cpp
new file mode 100644
index 0000000..651f236
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_convex_trimesh.cpp
@@ -0,0 +1,120 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+
+
+typedef struct _sLocalContactData
+{
+    dVector3	vPos;
+    dVector3	vNormal;
+    dReal		fDepth;
+    int			triIndex;
+    int			nFlags; // 0 = filtered out, 1 = OK
+}sLocalContactData;
+
+
+#if dTRIMESH_ENABLED
+
+#include "collision_util.h"
+#include "collision_std.h"
+#include "collision_trimesh_internal.h"
+#if dLIBCCD_ENABLED
+#include "collision_libccd.h"
+#endif
+
+int dCollideConvexTrimesh( dxGeom *o1, dxGeom *o2, int flags, dContactGeom* contacts, int skip )
+{
+    int contactcount = 0;
+    dIASSERT( skip >= (int)sizeof( dContactGeom ) );
+    dIASSERT( o1->type == dConvexClass );
+    dIASSERT( o2->type == dTriMeshClass );
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+#if dLIBCCD_ENABLED
+
+#if dTRIMESH_OPCODE
+    const dVector3 &meshPosition = *(const dVector3 *)dGeomGetPosition(o2);
+    // Find convex OBB in trimesh coordinates
+    Point convexAABBMin(o1->aabb[0] - meshPosition[0], o1->aabb[2] - meshPosition[1], o1->aabb[4] - meshPosition[2]);
+    Point convexAABBMax(o1->aabb[1] - meshPosition[0], o1->aabb[3] - meshPosition[1], o1->aabb[5] - meshPosition[2]);
+    
+    const Point convexCenter = 0.5f * (convexAABBMax + convexAABBMin);
+    const Point convexExtents = 0.5f * (convexAABBMax - convexAABBMin);
+    const Matrix3x3 convexRotation(1.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 1.0f);
+    OBB convexOOB(convexCenter, convexExtents, convexRotation);
+
+    Matrix4x4 meshTransformation;
+    const dMatrix3 &meshRotation = *(const dMatrix3 *)dGeomGetRotation(o2);
+    const dVector3 zeroVector = { REAL(0.0), };
+    MakeMatrix(zeroVector, meshRotation, meshTransformation);
+    
+    OBBCollider collider;
+    collider.SetFirstContact(false);
+    collider.SetTemporalCoherence(false);
+    collider.SetPrimitiveTests(false);
+    
+    OBBCache cache;
+    dxTriMesh *trimesh = (dxTriMesh *)o2;
+    if (collider.Collide(cache, convexOOB, trimesh->retrieveMeshBVTreeRef(), null, &meshTransformation)) {
+        int triCount = collider.GetNbTouchedPrimitives();
+        if (triCount > 0) {
+            int* triangles = (int*)collider.GetTouchedPrimitives();
+            contactcount = dCollideConvexTrimeshTrianglesCCD(o1, o2, triangles, triCount, flags, contacts, skip);
+        }
+    }
+
+#elif dTRIMESH_GIMPACT
+    dxTriMesh *trimesh = (dxTriMesh *)o2;
+
+    aabb3f test_aabb(o1->aabb[0], o1->aabb[1], o1->aabb[2], o1->aabb[3], o1->aabb[4], o1->aabb[5]);
+
+    GDYNAMIC_ARRAY collision_result;
+    GIM_CREATE_BOXQUERY_LIST(collision_result);
+
+    gim_aabbset_box_collision(&test_aabb, &trimesh->m_collision_trimesh.m_aabbset, &collision_result);
+
+    if (collision_result.m_size != 0)
+    {
+        GUINT32 * boxesresult = GIM_DYNARRAY_POINTER(GUINT32,collision_result);
+        GIM_TRIMESH * ptrimesh = &trimesh->m_collision_trimesh;
+        gim_trimesh_locks_work_data(ptrimesh);
+
+        contactcount = dCollideConvexTrimeshTrianglesCCD(o1, o2, (int *)boxesresult, collision_result.m_size, flags, contacts, skip);
+
+        gim_trimesh_unlocks_work_data(ptrimesh);
+    }
+
+    GIM_DYNARRAY_DESTROY(collision_result);
+#endif // dTRIMESH_GIMPACT
+
+#endif // dLIBCCD_ENABLED
+
+    return contactcount;
+}
+
+#endif // dTRIMESH_ENABLED
+
diff --git a/libs/ode-0.16.1/ode/src/collision_cylinder_box.cpp b/libs/ode-0.16.1/ode/src/collision_cylinder_box.cpp
new file mode 100644
index 0000000..4eaf92d
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_cylinder_box.cpp
@@ -0,0 +1,1038 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *	Cylinder-box collider by Alen Ladavac
+ *  Ported to ODE by Nguyen Binh
+ */
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_util.h"
+
+static const int MAX_CYLBOX_CLIP_POINTS  = 16;
+static const int nCYLINDER_AXIS			 = 2;
+// Number of segment of cylinder base circle.
+// Must be divisible by 4.
+static const int nCYLINDER_SEGMENT		 = 8;
+
+#define MAX_FLOAT	dInfinity
+
+// Data that passed through the collider's functions
+struct sCylinderBoxData
+{
+    sCylinderBoxData(dxGeom *Cylinder, dxGeom *Box, int flags, dContactGeom *contact, int skip):
+        m_gBox(Box), m_gCylinder(Cylinder), m_gContact(contact), m_iFlags(flags), m_iSkip(skip), m_nContacts(0)
+    {
+    }
+
+    void _cldInitCylinderBox();
+    int _cldTestAxis( dVector3& vInputNormal, int iAxis );
+    int _cldTestEdgeCircleAxis( const dVector3 &vCenterPoint, 
+        const dVector3 &vVx0, const dVector3 &vVx1, int iAxis );
+    int _cldTestSeparatingAxes();
+    int _cldClipCylinderToBox();
+    void _cldClipBoxToCylinder();
+    int PerformCollisionChecking();
+
+    // cylinder parameters
+    dMatrix3			m_mCylinderRot;
+    dVector3			m_vCylinderPos;
+    dVector3			m_vCylinderAxis;
+    dReal				m_fCylinderRadius;
+    dReal				m_fCylinderSize;
+    dVector3			m_avCylinderNormals[nCYLINDER_SEGMENT];
+
+    // box parameters
+
+    dMatrix3			m_mBoxRot;
+    dVector3			m_vBoxPos;
+    dVector3			m_vBoxHalfSize;
+    // box vertices array : 8 vertices
+    dVector3			m_avBoxVertices[8];
+
+    // global collider data
+    dVector3			m_vDiff;			
+    dVector3			m_vNormal;
+    dReal				m_fBestDepth;
+    dReal				m_fBestrb;
+    dReal				m_fBestrc;
+    int					m_iBestAxis;
+
+    // contact data
+    dVector3			m_vEp0, m_vEp1;
+    dReal				m_fDepth0, m_fDepth1;
+
+    // ODE stuff
+    dGeomID				m_gBox;
+    dGeomID				m_gCylinder;
+    dContactGeom*		m_gContact;
+    int					m_iFlags;
+    int					m_iSkip;
+    int					m_nContacts;
+
+};
+
+
+// initialize collision data
+void sCylinderBoxData::_cldInitCylinderBox() 
+{
+    // get cylinder position, orientation
+    const dReal* pRotCyc = dGeomGetRotation(m_gCylinder); 
+    dMatrix3Copy(pRotCyc,m_mCylinderRot);
+
+    const dVector3* pPosCyc = (const dVector3*)dGeomGetPosition(m_gCylinder);
+    dVector3Copy(*pPosCyc,m_vCylinderPos);
+
+    dMat3GetCol(m_mCylinderRot,nCYLINDER_AXIS,m_vCylinderAxis);
+
+    // get cylinder radius and size
+    dGeomCylinderGetParams(m_gCylinder,&m_fCylinderRadius,&m_fCylinderSize);
+
+    // get box position, orientation, size
+    const dReal* pRotBox = dGeomGetRotation(m_gBox);
+    dMatrix3Copy(pRotBox,m_mBoxRot);
+    const dVector3* pPosBox  = (const dVector3*)dGeomGetPosition(m_gBox);
+    dVector3Copy(*pPosBox,m_vBoxPos);
+
+    dGeomBoxGetLengths(m_gBox, m_vBoxHalfSize);
+    m_vBoxHalfSize[0] *= REAL(0.5);
+    m_vBoxHalfSize[1] *= REAL(0.5);
+    m_vBoxHalfSize[2] *= REAL(0.5);
+
+    // vertex 0
+    m_avBoxVertices[0][0] = -m_vBoxHalfSize[0];
+    m_avBoxVertices[0][1] =  m_vBoxHalfSize[1];
+    m_avBoxVertices[0][2] = -m_vBoxHalfSize[2];
+
+    // vertex 1
+    m_avBoxVertices[1][0] =  m_vBoxHalfSize[0];
+    m_avBoxVertices[1][1] =  m_vBoxHalfSize[1];
+    m_avBoxVertices[1][2] = -m_vBoxHalfSize[2];
+
+    // vertex 2
+    m_avBoxVertices[2][0] = -m_vBoxHalfSize[0];
+    m_avBoxVertices[2][1] = -m_vBoxHalfSize[1];
+    m_avBoxVertices[2][2] = -m_vBoxHalfSize[2];
+
+    // vertex 3
+    m_avBoxVertices[3][0] =  m_vBoxHalfSize[0];
+    m_avBoxVertices[3][1] = -m_vBoxHalfSize[1];
+    m_avBoxVertices[3][2] = -m_vBoxHalfSize[2];
+
+    // vertex 4
+    m_avBoxVertices[4][0] =  m_vBoxHalfSize[0];
+    m_avBoxVertices[4][1] =  m_vBoxHalfSize[1];
+    m_avBoxVertices[4][2] =  m_vBoxHalfSize[2];
+
+    // vertex 5
+    m_avBoxVertices[5][0] =  m_vBoxHalfSize[0];
+    m_avBoxVertices[5][1] = -m_vBoxHalfSize[1];
+    m_avBoxVertices[5][2] =  m_vBoxHalfSize[2];
+
+    // vertex 6
+    m_avBoxVertices[6][0] = -m_vBoxHalfSize[0];
+    m_avBoxVertices[6][1] = -m_vBoxHalfSize[1];
+    m_avBoxVertices[6][2] =  m_vBoxHalfSize[2];
+
+    // vertex 7
+    m_avBoxVertices[7][0] = -m_vBoxHalfSize[0];
+    m_avBoxVertices[7][1] =  m_vBoxHalfSize[1];
+    m_avBoxVertices[7][2] =  m_vBoxHalfSize[2];
+
+    // temp index
+    int i = 0;
+    dVector3	vTempBoxVertices[8];
+    // transform vertices in absolute space
+    for(i=0; i < 8; i++) 
+    {
+        dMultiplyMat3Vec3(m_mBoxRot,m_avBoxVertices[i], vTempBoxVertices[i]);
+        dVector3Add(vTempBoxVertices[i], m_vBoxPos, m_avBoxVertices[i]);
+    }
+
+    // find relative position
+    dVector3Subtract(m_vCylinderPos,m_vBoxPos,m_vDiff);
+    m_fBestDepth = MAX_FLOAT;
+    m_vNormal[0] = REAL(0.0);
+    m_vNormal[1] = REAL(0.0);
+    m_vNormal[2] = REAL(0.0);
+
+    // calculate basic angle for nCYLINDER_SEGMENT-gon
+    dReal fAngle = (dReal) (M_PI/nCYLINDER_SEGMENT);
+
+    // calculate angle increment
+    dReal fAngleIncrement = fAngle * REAL(2.0); 
+
+    // calculate nCYLINDER_SEGMENT-gon points
+    for(i = 0; i < nCYLINDER_SEGMENT; i++) 
+    {
+        m_avCylinderNormals[i][0] = -dCos(fAngle);
+        m_avCylinderNormals[i][1] = -dSin(fAngle);
+        m_avCylinderNormals[i][2] = 0;
+
+        fAngle += fAngleIncrement;
+    }
+
+    m_fBestrb		= 0;
+    m_fBestrc		= 0;
+    m_iBestAxis		= 0;
+    m_nContacts		= 0;
+
+}
+
+// test for given separating axis
+int sCylinderBoxData::_cldTestAxis( dVector3& vInputNormal, int iAxis ) 
+{
+    // check length of input normal
+    dReal fL = dVector3Length(vInputNormal);
+    // if not long enough
+    if ( fL < REAL(1e-5) ) 
+    {
+        // do nothing
+        return 1;
+    }
+
+    // otherwise make it unit for sure
+    dNormalize3(vInputNormal);
+
+    // project box and Cylinder on mAxis
+    dReal fdot1 = dVector3Dot(m_vCylinderAxis, vInputNormal);
+
+    dReal frc;
+
+    if (fdot1 > REAL(1.0)) 
+    {
+        // assume fdot1 = 1
+        frc = m_fCylinderSize*REAL(0.5);
+    }
+    else if (fdot1 < REAL(-1.0))
+    {
+        // assume fdot1 = -1
+        frc = m_fCylinderSize*REAL(0.5);
+    }
+    else
+    {
+        // project box and capsule on iAxis
+        frc = dFabs( fdot1 * (m_fCylinderSize*REAL(0.5))) + m_fCylinderRadius * dSqrt(REAL(1.0)-(fdot1*fdot1));
+    }
+
+    dVector3	vTemp1;
+
+    dMat3GetCol(m_mBoxRot,0,vTemp1);
+    dReal frb = dFabs(dVector3Dot(vTemp1,vInputNormal))*m_vBoxHalfSize[0];
+
+    dMat3GetCol(m_mBoxRot,1,vTemp1);
+    frb += dFabs(dVector3Dot(vTemp1,vInputNormal))*m_vBoxHalfSize[1];
+
+    dMat3GetCol(m_mBoxRot,2,vTemp1);
+    frb += dFabs(dVector3Dot(vTemp1,vInputNormal))*m_vBoxHalfSize[2];
+
+    // project their distance on separating axis
+    dReal fd  = dVector3Dot(m_vDiff,vInputNormal);
+
+    // get depth 
+
+    dReal fDepth = frc + frb;  // Calculate partial depth
+
+    // if they do not overlap exit, we have no intersection
+    if ( dFabs(fd) > fDepth )
+    { 
+        return 0; 
+    } 
+
+    // Finalyze the depth calculation
+    fDepth -= dFabs(fd);
+
+    // get maximum depth
+    if ( fDepth < m_fBestDepth ) 
+    {
+        m_fBestDepth = fDepth;
+        dVector3Copy(vInputNormal,m_vNormal);
+        m_iBestAxis  = iAxis;
+        m_fBestrb    = frb;
+        m_fBestrc    = frc;
+
+        // flip normal if interval is wrong faced
+        if (fd > 0) 
+        { 
+            dVector3Inv(m_vNormal);
+        }
+    }
+
+    return 1;
+}
+
+
+// check for separation between box edge and cylinder circle edge
+int sCylinderBoxData::_cldTestEdgeCircleAxis( 
+    const dVector3 &vCenterPoint, 
+    const dVector3 &vVx0, const dVector3 &vVx1, 
+    int iAxis ) 
+{
+    // calculate direction of edge
+    dVector3 vDirEdge;
+    dVector3Subtract(vVx1,vVx0,vDirEdge);
+    dNormalize3(vDirEdge);
+    // starting point of edge 
+    dVector3 vEStart;
+    dVector3Copy(vVx0,vEStart);;
+
+    // calculate angle cosine between cylinder axis and edge
+    dReal fdot2 = dVector3Dot (vDirEdge,m_vCylinderAxis);
+
+    // if edge is perpendicular to cylinder axis
+    if(dFabs(fdot2) < REAL(1e-5)) 
+    {
+        // this can't be separating axis, because edge is parallel to circle plane
+        return 1;
+    }
+
+    // find point of intersection between edge line and circle plane
+    dVector3 vTemp1;
+    dVector3Subtract(vCenterPoint,vEStart,vTemp1);
+    dReal fdot1 = dVector3Dot(vTemp1,m_vCylinderAxis);
+    dVector3 vpnt;
+    vpnt[0]= vEStart[0] + vDirEdge[0] * (fdot1/fdot2);
+    vpnt[1]= vEStart[1] + vDirEdge[1] * (fdot1/fdot2);
+    vpnt[2]= vEStart[2] + vDirEdge[2] * (fdot1/fdot2);
+
+    // find tangent vector on circle with same center (vCenterPoint) that
+    // touches point of intersection (vpnt)
+    dVector3 vTangent;
+    dVector3Subtract(vCenterPoint,vpnt,vTemp1);
+    dVector3Cross(vTemp1,m_vCylinderAxis,vTangent);
+
+    // find vector orthogonal both to tangent and edge direction
+    dVector3 vAxis;
+    dVector3Cross(vTangent,vDirEdge,vAxis);
+
+    // use that vector as separating axis
+    return _cldTestAxis( vAxis, iAxis );
+}
+
+// Test separating axis for collision
+int sCylinderBoxData::_cldTestSeparatingAxes() 
+{
+    // reset best axis
+    m_fBestDepth = MAX_FLOAT;
+    m_iBestAxis = 0;
+    m_fBestrb = 0;
+    m_fBestrc = 0;
+    m_nContacts = 0;
+
+    dVector3  vAxis = {REAL(0.0),REAL(0.0),REAL(0.0),REAL(0.0)};
+
+    // Epsilon value for checking axis vector length 
+    const dReal fEpsilon = REAL(1e-6);
+
+    // axis A0
+    dMat3GetCol(m_mBoxRot, 0 , vAxis);
+    if (!_cldTestAxis( vAxis, 1 )) 
+    {
+        return 0;
+    }
+
+    // axis A1
+    dMat3GetCol(m_mBoxRot, 1 , vAxis);
+    if (!_cldTestAxis( vAxis, 2 )) 
+    {
+        return 0;
+    }
+
+    // axis A2
+    dMat3GetCol(m_mBoxRot, 2 , vAxis);
+    if (!_cldTestAxis( vAxis, 3 )) 
+    {
+        return 0;
+    }
+
+    // axis C - Cylinder Axis
+    //vAxis = vCylinderAxis;
+    dVector3Copy(m_vCylinderAxis , vAxis);
+    if (!_cldTestAxis( vAxis, 4 )) 
+    {
+        return 0;
+    }
+
+    // axis CxA0
+    //vAxis = ( vCylinderAxis cross mthGetColM33f( mBoxRot, 0 ));
+    dVector3CrossMat3Col(m_mBoxRot, 0 ,m_vCylinderAxis, vAxis);
+    if(dVector3LengthSquare( vAxis ) > fEpsilon ) 
+    {
+        if (!_cldTestAxis( vAxis, 5 ))
+        {
+            return 0;
+        }
+    }
+
+    // axis CxA1
+    //vAxis = ( vCylinderAxis cross mthGetColM33f( mBoxRot, 1 ));
+    dVector3CrossMat3Col(m_mBoxRot, 1 ,m_vCylinderAxis, vAxis);
+    if(dVector3LengthSquare( vAxis ) > fEpsilon ) 
+    {
+        if (!_cldTestAxis( vAxis, 6 )) 
+        {
+            return 0;
+        }
+    }
+
+    // axis CxA2
+    //vAxis = ( vCylinderAxis cross mthGetColM33f( mBoxRot, 2 ));
+    dVector3CrossMat3Col(m_mBoxRot, 2 ,m_vCylinderAxis, vAxis);
+    if(dVector3LengthSquare( vAxis ) > fEpsilon ) 
+    {
+        if (!_cldTestAxis( vAxis, 7 ))
+        {
+            return 0;
+        }
+    }
+
+    int i = 0;
+    dVector3	vTemp1;
+    dVector3	vTemp2;
+    // here we check box's vertices axis
+    for(i=0; i< 8; i++) 
+    {
+        //vAxis = ( vCylinderAxis cross (m_avBoxVertices[i] - vCylinderPos));
+        dVector3Subtract(m_avBoxVertices[i],m_vCylinderPos,vTemp1);
+        dVector3Cross(m_vCylinderAxis,vTemp1,vTemp2);
+        //vAxis = ( vCylinderAxis cross vAxis );
+        dVector3Cross(m_vCylinderAxis,vTemp2,vAxis);
+        if(dVector3LengthSquare( vAxis ) > fEpsilon ) 
+        {
+            if (!_cldTestAxis( vAxis, 8 + i ))
+            {
+                return 0;
+            }
+        }
+    }
+
+    // ************************************
+    // this is defined for first 12 axes
+    // normal of plane that contains top circle of cylinder
+    // center of top circle of cylinder
+    dVector3 vcc;
+    vcc[0] = (m_vCylinderPos)[0] + m_vCylinderAxis[0]*(m_fCylinderSize*REAL(0.5));
+    vcc[1] = (m_vCylinderPos)[1] + m_vCylinderAxis[1]*(m_fCylinderSize*REAL(0.5));
+    vcc[2] = (m_vCylinderPos)[2] + m_vCylinderAxis[2]*(m_fCylinderSize*REAL(0.5));
+    // ************************************
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[1], m_avBoxVertices[0], 16)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[1], m_avBoxVertices[3], 17)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[2], m_avBoxVertices[3], 18))
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[2], m_avBoxVertices[0], 19)) 
+    {
+        return 0;
+    }
+
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[4], m_avBoxVertices[1], 20))
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[4], m_avBoxVertices[7], 21))
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[0], m_avBoxVertices[7], 22)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[5], m_avBoxVertices[3], 23)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[5], m_avBoxVertices[6], 24)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[2], m_avBoxVertices[6], 25)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[4], m_avBoxVertices[5], 26)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[6], m_avBoxVertices[7], 27)) 
+    {
+        return 0;
+    }
+
+    // ************************************
+    // this is defined for second 12 axes
+    // normal of plane that contains bottom circle of cylinder
+    // center of bottom circle of cylinder
+    //	vcc = vCylinderPos - vCylinderAxis*(fCylinderSize*REAL(0.5));
+    vcc[0] = (m_vCylinderPos)[0] - m_vCylinderAxis[0]*(m_fCylinderSize*REAL(0.5));
+    vcc[1] = (m_vCylinderPos)[1] - m_vCylinderAxis[1]*(m_fCylinderSize*REAL(0.5));
+    vcc[2] = (m_vCylinderPos)[2] - m_vCylinderAxis[2]*(m_fCylinderSize*REAL(0.5));
+    // ************************************
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[1], m_avBoxVertices[0], 28)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[1], m_avBoxVertices[3], 29)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[2], m_avBoxVertices[3], 30)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[2], m_avBoxVertices[0], 31)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[4], m_avBoxVertices[1], 32)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[4], m_avBoxVertices[7], 33)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[0], m_avBoxVertices[7], 34)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[5], m_avBoxVertices[3], 35)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[5], m_avBoxVertices[6], 36)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[2], m_avBoxVertices[6], 37)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[4], m_avBoxVertices[5], 38)) 
+    {
+        return 0;
+    }
+
+    if (!_cldTestEdgeCircleAxis( vcc, m_avBoxVertices[6], m_avBoxVertices[7], 39)) 
+    {
+        return 0;
+    }
+
+    return 1;
+}
+
+int sCylinderBoxData::_cldClipCylinderToBox()
+{
+    dIASSERT(m_nContacts != (m_iFlags & NUMC_MASK));
+
+    // calculate that vector perpendicular to cylinder axis which closes lowest angle with collision normal
+    dVector3 vN;
+    dReal fTemp1 = dVector3Dot(m_vCylinderAxis,m_vNormal);
+    vN[0]	=	m_vNormal[0] - m_vCylinderAxis[0]*fTemp1;
+    vN[1]	=	m_vNormal[1] - m_vCylinderAxis[1]*fTemp1;
+    vN[2]	=	m_vNormal[2] - m_vCylinderAxis[2]*fTemp1;
+
+    // normalize that vector
+    dNormalize3(vN);
+
+    // translate cylinder end points by the vector
+    dVector3 vCposTrans;
+    vCposTrans[0] = m_vCylinderPos[0] + vN[0] * m_fCylinderRadius;
+    vCposTrans[1] = m_vCylinderPos[1] + vN[1] * m_fCylinderRadius;
+    vCposTrans[2] = m_vCylinderPos[2] + vN[2] * m_fCylinderRadius;
+
+    m_vEp0[0]  = vCposTrans[0] + m_vCylinderAxis[0]*(m_fCylinderSize*REAL(0.5));
+    m_vEp0[1]  = vCposTrans[1] + m_vCylinderAxis[1]*(m_fCylinderSize*REAL(0.5));
+    m_vEp0[2]  = vCposTrans[2] + m_vCylinderAxis[2]*(m_fCylinderSize*REAL(0.5));
+
+    m_vEp1[0]  = vCposTrans[0] - m_vCylinderAxis[0]*(m_fCylinderSize*REAL(0.5));
+    m_vEp1[1]  = vCposTrans[1] - m_vCylinderAxis[1]*(m_fCylinderSize*REAL(0.5));
+    m_vEp1[2]  = vCposTrans[2] - m_vCylinderAxis[2]*(m_fCylinderSize*REAL(0.5));
+
+    // transform edge points in box space
+    m_vEp0[0] -= m_vBoxPos[0];
+    m_vEp0[1] -= m_vBoxPos[1];
+    m_vEp0[2] -= m_vBoxPos[2];
+
+    m_vEp1[0] -= m_vBoxPos[0];
+    m_vEp1[1] -= m_vBoxPos[1];
+    m_vEp1[2] -= m_vBoxPos[2];
+
+    dVector3 vTemp1;
+    // clip the edge to box 
+    dVector4 plPlane;
+    // plane 0 +x
+    dMat3GetCol(m_mBoxRot,0,vTemp1);
+    dConstructPlane(vTemp1,m_vBoxHalfSize[0],plPlane);
+    if(!dClipEdgeToPlane( m_vEp0, m_vEp1, plPlane )) 
+    { 
+        return 0; 
+    }
+
+    // plane 1 +y
+    dMat3GetCol(m_mBoxRot,1,vTemp1);
+    dConstructPlane(vTemp1,m_vBoxHalfSize[1],plPlane);
+    if(!dClipEdgeToPlane( m_vEp0, m_vEp1, plPlane )) 
+    { 
+        return 0; 
+    }
+
+    // plane 2 +z
+    dMat3GetCol(m_mBoxRot,2,vTemp1);
+    dConstructPlane(vTemp1,m_vBoxHalfSize[2],plPlane);
+    if(!dClipEdgeToPlane( m_vEp0, m_vEp1, plPlane )) 
+    { 
+        return 0; 
+    }
+
+    // plane 3 -x
+    dMat3GetCol(m_mBoxRot,0,vTemp1);
+    dVector3Inv(vTemp1);
+    dConstructPlane(vTemp1,m_vBoxHalfSize[0],plPlane);
+    if(!dClipEdgeToPlane( m_vEp0, m_vEp1, plPlane )) 
+    { 
+        return 0; 
+    }
+
+    // plane 4 -y
+    dMat3GetCol(m_mBoxRot,1,vTemp1);
+    dVector3Inv(vTemp1);
+    dConstructPlane(vTemp1,m_vBoxHalfSize[1],plPlane);
+    if(!dClipEdgeToPlane( m_vEp0, m_vEp1, plPlane )) 
+    { 
+        return 0; 
+    }
+
+    // plane 5 -z
+    dMat3GetCol(m_mBoxRot,2,vTemp1);
+    dVector3Inv(vTemp1);
+    dConstructPlane(vTemp1,m_vBoxHalfSize[2],plPlane);
+    if(!dClipEdgeToPlane( m_vEp0, m_vEp1, plPlane )) 
+    { 
+        return 0; 
+    }
+
+    // calculate depths for both contact points
+    m_fDepth0 = m_fBestrb + dVector3Dot(m_vEp0, m_vNormal);
+    m_fDepth1 = m_fBestrb + dVector3Dot(m_vEp1, m_vNormal);
+
+    // clamp depths to 0
+    if(m_fDepth0<0) 
+    {
+        m_fDepth0 = REAL(0.0);
+    }
+
+    if(m_fDepth1<0) 
+    {
+        m_fDepth1 = REAL(0.0);
+    }
+
+    // back transform edge points from box to absolute space
+    m_vEp0[0] += m_vBoxPos[0];
+    m_vEp0[1] += m_vBoxPos[1];
+    m_vEp0[2] += m_vBoxPos[2];
+
+    m_vEp1[0] += m_vBoxPos[0];
+    m_vEp1[1] += m_vBoxPos[1];
+    m_vEp1[2] += m_vBoxPos[2];
+
+    dContactGeom* Contact0 = SAFECONTACT(m_iFlags, m_gContact, m_nContacts, m_iSkip);
+    Contact0->depth = m_fDepth0;
+    dVector3Copy(m_vNormal,Contact0->normal);
+    dVector3Copy(m_vEp0,Contact0->pos);
+    Contact0->g1 = m_gCylinder;
+    Contact0->g2 = m_gBox;
+    Contact0->side1 = -1;
+    Contact0->side2 = -1;
+    dVector3Inv(Contact0->normal);
+    m_nContacts++;
+
+    if (m_nContacts != (m_iFlags & NUMC_MASK))
+    {
+        dContactGeom* Contact1 = SAFECONTACT(m_iFlags, m_gContact, m_nContacts, m_iSkip);
+        Contact1->depth = m_fDepth1;
+        dVector3Copy(m_vNormal,Contact1->normal);
+        dVector3Copy(m_vEp1,Contact1->pos);
+        Contact1->g1 = m_gCylinder;
+        Contact1->g2 = m_gBox;
+        Contact1->side1 = -1;
+        Contact1->side2 = -1;
+        dVector3Inv(Contact1->normal);
+        m_nContacts++;
+    }
+
+    return 1;
+}
+
+
+void sCylinderBoxData::_cldClipBoxToCylinder() 
+{
+    dIASSERT(m_nContacts != (m_iFlags & NUMC_MASK));
+
+    dVector3 vCylinderCirclePos, vCylinderCircleNormal_Rel;
+    // check which circle from cylinder we take for clipping
+    if ( dVector3Dot(m_vCylinderAxis, m_vNormal) > REAL(0.0) ) 
+    {
+        // get top circle
+        vCylinderCirclePos[0] = m_vCylinderPos[0] + m_vCylinderAxis[0]*(m_fCylinderSize*REAL(0.5));
+        vCylinderCirclePos[1] = m_vCylinderPos[1] + m_vCylinderAxis[1]*(m_fCylinderSize*REAL(0.5));
+        vCylinderCirclePos[2] = m_vCylinderPos[2] + m_vCylinderAxis[2]*(m_fCylinderSize*REAL(0.5));
+
+        vCylinderCircleNormal_Rel[0] = REAL(0.0);
+        vCylinderCircleNormal_Rel[1] = REAL(0.0);
+        vCylinderCircleNormal_Rel[2] = REAL(0.0);
+        vCylinderCircleNormal_Rel[nCYLINDER_AXIS] = REAL(-1.0);
+    }
+    else 
+    {
+        // get bottom circle
+        vCylinderCirclePos[0] = m_vCylinderPos[0] - m_vCylinderAxis[0]*(m_fCylinderSize*REAL(0.5));
+        vCylinderCirclePos[1] = m_vCylinderPos[1] - m_vCylinderAxis[1]*(m_fCylinderSize*REAL(0.5));
+        vCylinderCirclePos[2] = m_vCylinderPos[2] - m_vCylinderAxis[2]*(m_fCylinderSize*REAL(0.5));
+
+        vCylinderCircleNormal_Rel[0] = REAL(0.0);
+        vCylinderCircleNormal_Rel[1] = REAL(0.0);
+        vCylinderCircleNormal_Rel[2] = REAL(0.0);
+        vCylinderCircleNormal_Rel[nCYLINDER_AXIS] = REAL(1.0);
+    }
+
+    // vNr is normal in Box frame, pointing from Cylinder to Box
+    dVector3 vNr;
+    dMatrix3 mBoxInv;
+
+    // Find a way to use quaternion
+    dMatrix3Inv(m_mBoxRot,mBoxInv);
+    dMultiplyMat3Vec3(mBoxInv,m_vNormal,vNr);
+
+    dVector3 vAbsNormal;
+
+    vAbsNormal[0] = dFabs( vNr[0] );
+    vAbsNormal[1] = dFabs( vNr[1] );
+    vAbsNormal[2] = dFabs( vNr[2] );
+
+    // find which face in box is closest to cylinder
+    int iB0, iB1, iB2;
+
+    // Different from Croteam's code
+    if (vAbsNormal[1] > vAbsNormal[0]) 
+    {
+        // 1 > 0
+        if (vAbsNormal[0]> vAbsNormal[2]) 
+        {
+            // 0 > 2 -> 1 > 0 >2
+            iB0 = 1; iB1 = 0; iB2 = 2;
+        } 
+        else 
+        {
+            // 2 > 0-> Must compare 1 and 2
+            if (vAbsNormal[1] > vAbsNormal[2])
+            {
+                // 1 > 2 -> 1 > 2 > 0
+                iB0 = 1; iB1 = 2; iB2 = 0;
+            }
+            else
+            {
+                // 2 > 1 -> 2 > 1 > 0;
+                iB0 = 2; iB1 = 1; iB2 = 0;
+            }			
+        }
+    } 
+    else 
+    {
+        // 0 > 1
+        if (vAbsNormal[1] > vAbsNormal[2]) 
+        {
+            // 1 > 2 -> 0 > 1 > 2
+            iB0 = 0; iB1 = 1; iB2 = 2;
+        }
+        else 
+        {
+            // 2 > 1 -> Must compare 0 and 2
+            if (vAbsNormal[0] > vAbsNormal[2])
+            {
+                // 0 > 2 -> 0 > 2 > 1;
+                iB0 = 0; iB1 = 2; iB2 = 1;
+            }
+            else
+            {
+                // 2 > 0 -> 2 > 0 > 1;
+                iB0 = 2; iB1 = 0; iB2 = 1;
+            }		
+        }
+    }
+
+    dVector3 vCenter;
+    // find center of box polygon
+    dVector3 vTemp;
+    if (vNr[iB0] > 0) 
+    {
+        dMat3GetCol(m_mBoxRot,iB0,vTemp);
+        vCenter[0] = m_vBoxPos[0] - m_vBoxHalfSize[iB0]*vTemp[0];
+        vCenter[1] = m_vBoxPos[1] - m_vBoxHalfSize[iB0]*vTemp[1];
+        vCenter[2] = m_vBoxPos[2] - m_vBoxHalfSize[iB0]*vTemp[2];
+    }
+    else 
+    {
+        dMat3GetCol(m_mBoxRot,iB0,vTemp);
+        vCenter[0] = m_vBoxPos[0] + m_vBoxHalfSize[iB0]*vTemp[0];
+        vCenter[1] = m_vBoxPos[1] + m_vBoxHalfSize[iB0]*vTemp[1];
+        vCenter[2] = m_vBoxPos[2] + m_vBoxHalfSize[iB0]*vTemp[2];
+    }
+
+    // find the vertices of box polygon
+    dVector3 avPoints[4];
+    dVector3 avTempArray1[MAX_CYLBOX_CLIP_POINTS];
+    dVector3 avTempArray2[MAX_CYLBOX_CLIP_POINTS];
+
+    int i=0;
+    for(i=0; i<MAX_CYLBOX_CLIP_POINTS; i++) 
+    {
+        avTempArray1[i][0] = REAL(0.0);
+        avTempArray1[i][1] = REAL(0.0);
+        avTempArray1[i][2] = REAL(0.0);
+
+        avTempArray2[i][0] = REAL(0.0);
+        avTempArray2[i][1] = REAL(0.0);
+        avTempArray2[i][2] = REAL(0.0);
+    }
+
+    dVector3 vAxis1, vAxis2;
+
+    dMat3GetCol(m_mBoxRot,iB1,vAxis1);
+    dMat3GetCol(m_mBoxRot,iB2,vAxis2);
+
+    avPoints[0][0] = vCenter[0] + m_vBoxHalfSize[iB1] * vAxis1[0] - m_vBoxHalfSize[iB2] * vAxis2[0];
+    avPoints[0][1] = vCenter[1] + m_vBoxHalfSize[iB1] * vAxis1[1] - m_vBoxHalfSize[iB2] * vAxis2[1];
+    avPoints[0][2] = vCenter[2] + m_vBoxHalfSize[iB1] * vAxis1[2] - m_vBoxHalfSize[iB2] * vAxis2[2];
+
+    avPoints[1][0] = vCenter[0] - m_vBoxHalfSize[iB1] * vAxis1[0] - m_vBoxHalfSize[iB2] * vAxis2[0];
+    avPoints[1][1] = vCenter[1] - m_vBoxHalfSize[iB1] * vAxis1[1] - m_vBoxHalfSize[iB2] * vAxis2[1];
+    avPoints[1][2] = vCenter[2] - m_vBoxHalfSize[iB1] * vAxis1[2] - m_vBoxHalfSize[iB2] * vAxis2[2];
+
+    avPoints[2][0] = vCenter[0] - m_vBoxHalfSize[iB1] * vAxis1[0] + m_vBoxHalfSize[iB2] * vAxis2[0];
+    avPoints[2][1] = vCenter[1] - m_vBoxHalfSize[iB1] * vAxis1[1] + m_vBoxHalfSize[iB2] * vAxis2[1];
+    avPoints[2][2] = vCenter[2] - m_vBoxHalfSize[iB1] * vAxis1[2] + m_vBoxHalfSize[iB2] * vAxis2[2];
+
+    avPoints[3][0] = vCenter[0] + m_vBoxHalfSize[iB1] * vAxis1[0] + m_vBoxHalfSize[iB2] * vAxis2[0];
+    avPoints[3][1] = vCenter[1] + m_vBoxHalfSize[iB1] * vAxis1[1] + m_vBoxHalfSize[iB2] * vAxis2[1];
+    avPoints[3][2] = vCenter[2] + m_vBoxHalfSize[iB1] * vAxis1[2] + m_vBoxHalfSize[iB2] * vAxis2[2];
+
+    // transform box points to space of cylinder circle
+    dMatrix3 mCylinderInv;
+    dMatrix3Inv(m_mCylinderRot,mCylinderInv);
+
+    for(i=0; i<4; i++) 
+    {
+        dVector3Subtract(avPoints[i],vCylinderCirclePos,vTemp);
+        dMultiplyMat3Vec3(mCylinderInv,vTemp,avPoints[i]);
+    }
+
+    int iTmpCounter1 = 0;
+    int iTmpCounter2 = 0;
+    dVector4 plPlane;
+
+    // plane of cylinder that contains circle for intersection
+    dConstructPlane(vCylinderCircleNormal_Rel,REAL(0.0),plPlane);
+    dClipPolyToPlane(avPoints, 4, avTempArray1, iTmpCounter1, plPlane);
+
+
+    // Body of base circle of Cylinder
+    int nCircleSegment = 0;
+    for (nCircleSegment = 0; nCircleSegment < nCYLINDER_SEGMENT; nCircleSegment++)
+    {
+        dConstructPlane(m_avCylinderNormals[nCircleSegment],m_fCylinderRadius,plPlane);
+
+        if (0 == (nCircleSegment % 2))
+        {
+            dClipPolyToPlane( avTempArray1 , iTmpCounter1 , avTempArray2, iTmpCounter2, plPlane);
+        }
+        else
+        {
+            dClipPolyToPlane( avTempArray2, iTmpCounter2, avTempArray1 , iTmpCounter1 , plPlane );
+        }
+
+        dIASSERT( iTmpCounter1 >= 0 && iTmpCounter1 <= MAX_CYLBOX_CLIP_POINTS );
+        dIASSERT( iTmpCounter2 >= 0 && iTmpCounter2 <= MAX_CYLBOX_CLIP_POINTS );
+    }
+
+    // back transform clipped points to absolute space
+    dReal ftmpdot;	
+    dReal fTempDepth;
+    dVector3 vPoint;
+
+    if (nCircleSegment % 2)
+    {
+        for( i=0; i<iTmpCounter2; i++)
+        {
+            dMultiply0_331(vPoint,m_mCylinderRot,avTempArray2[i]);
+            vPoint[0] += vCylinderCirclePos[0];
+            vPoint[1] += vCylinderCirclePos[1];
+            vPoint[2] += vCylinderCirclePos[2];
+
+            dVector3Subtract(vPoint,m_vCylinderPos,vTemp);
+            ftmpdot	 = dVector3Dot(vTemp, m_vNormal);
+            fTempDepth = m_fBestrc - ftmpdot;
+            // Depth must be positive
+            if (fTempDepth > REAL(0.0))
+            {
+                // generate contacts
+                dContactGeom* Contact0 = SAFECONTACT(m_iFlags, m_gContact, m_nContacts, m_iSkip);
+                Contact0->depth = fTempDepth;
+                dVector3Copy(m_vNormal,Contact0->normal);
+                dVector3Copy(vPoint,Contact0->pos);
+                Contact0->g1 = m_gCylinder;
+                Contact0->g2 = m_gBox;
+                Contact0->side1 = -1;
+                Contact0->side2 = -1;
+                dVector3Inv(Contact0->normal);
+                m_nContacts++;
+
+                if (m_nContacts == (m_iFlags & NUMC_MASK))
+                {
+                    break;
+                }
+            }
+        }
+    }
+    else
+    {
+        for( i=0; i<iTmpCounter1; i++)
+        {
+            dMultiply0_331(vPoint,m_mCylinderRot,avTempArray1[i]);
+            vPoint[0] += vCylinderCirclePos[0];
+            vPoint[1] += vCylinderCirclePos[1];
+            vPoint[2] += vCylinderCirclePos[2];
+
+            dVector3Subtract(vPoint,m_vCylinderPos,vTemp);
+            ftmpdot	 = dVector3Dot(vTemp, m_vNormal);
+            fTempDepth = m_fBestrc - ftmpdot;
+            // Depth must be positive
+            if (fTempDepth > REAL(0.0))
+            {
+                // generate contacts
+                dContactGeom* Contact0 = SAFECONTACT(m_iFlags, m_gContact, m_nContacts, m_iSkip);
+                Contact0->depth = fTempDepth;
+                dVector3Copy(m_vNormal,Contact0->normal);
+                dVector3Copy(vPoint,Contact0->pos);
+                Contact0->g1 = m_gCylinder;
+                Contact0->g2 = m_gBox;
+                Contact0->side1 = -1;
+                Contact0->side2 = -1;
+                dVector3Inv(Contact0->normal);
+                m_nContacts++;
+
+                if (m_nContacts == (m_iFlags & NUMC_MASK))
+                {
+                    break;
+                }
+            }
+        }
+    }
+}
+
+int sCylinderBoxData::PerformCollisionChecking()
+{
+    // initialize collider
+    _cldInitCylinderBox();
+
+    // do intersection test and find best separating axis
+    if ( !_cldTestSeparatingAxes() ) 
+    {
+        // if not found do nothing
+        return 0;
+    }
+
+    // if best separation axis is not found
+    if ( m_iBestAxis == 0 ) 
+    {
+        // this should not happen (we should already exit in that case)
+        dIASSERT(0);
+        // do nothing
+        return 0;
+    }
+
+    dReal fdot = dVector3Dot(m_vNormal,m_vCylinderAxis);
+    // choose which clipping method are we going to apply
+    if (dFabs(fdot) < REAL(0.9) ) 
+    {
+        // clip cylinder over box
+        if(!_cldClipCylinderToBox()) 
+        {
+            return 0;
+        }
+    } 
+    else 
+    {
+        _cldClipBoxToCylinder();  
+    }
+
+    return m_nContacts;
+}
+
+// Cylinder - Box by CroTeam
+// Ported by Nguyen Binh
+int dCollideCylinderBox(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dCylinderClass);
+    dIASSERT (o2->type == dBoxClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    sCylinderBoxData cData(o1, o2, flags, contact, skip);
+
+    return cData.PerformCollisionChecking();
+}
+
+
diff --git a/libs/ode-0.16.1/ode/src/collision_cylinder_plane.cpp b/libs/ode-0.16.1/ode/src/collision_cylinder_plane.cpp
new file mode 100644
index 0000000..67424ad
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_cylinder_plane.cpp
@@ -0,0 +1,266 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+/*
+ * Cylinder-Plane collider by Christoph Beyer ( boernerb@web.de )
+ *
+ * This testing basically comes down to testing the intersection
+ * of the cylinder caps (discs) with the plane.
+ * 
+ */
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include <ode/objects.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"	// for dxGeom
+#include "collision_util.h"
+
+
+int dCollideCylinderPlane(dxGeom *Cylinder, dxGeom *Plane, int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (Cylinder->type == dCylinderClass);
+    dIASSERT (Plane->type == dPlaneClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    int GeomCount = 0; // count of used contactgeoms
+
+#ifdef dSINGLE
+    const dReal toleranz = REAL(0.0001);
+#endif
+#ifdef dDOUBLE
+    const dReal toleranz = REAL(0.0000001);
+#endif
+
+    // Get the properties of the cylinder (length+radius)
+    dReal radius, length;
+    dGeomCylinderGetParams(Cylinder, &radius, &length);
+    dVector3 &cylpos = Cylinder->final_posr->pos;
+    // and the plane
+    dVector4 planevec;
+    dGeomPlaneGetParams(Plane, planevec);
+    dVector3 PlaneNormal = {planevec[0],planevec[1],planevec[2]};
+    //dVector3 PlanePos = {planevec[0] * planevec[3],planevec[1] * planevec[3],planevec[2] * planevec[3]};
+
+    dVector3 G1Pos1, G1Pos2, vDir1;
+    vDir1[0] = Cylinder->final_posr->R[2];
+    vDir1[1] = Cylinder->final_posr->R[6];
+    vDir1[2] = Cylinder->final_posr->R[10];
+
+    dReal s;
+    s = length * REAL(0.5);
+    G1Pos2[0] = vDir1[0] * s + cylpos[0];
+    G1Pos2[1] = vDir1[1] * s + cylpos[1];
+    G1Pos2[2] = vDir1[2] * s + cylpos[2];
+
+    G1Pos1[0] = vDir1[0] * -s + cylpos[0];
+    G1Pos1[1] = vDir1[1] * -s + cylpos[1];
+    G1Pos1[2] = vDir1[2] * -s + cylpos[2];
+
+    dVector3 C;
+
+    // parallel-check
+    s = vDir1[0] * PlaneNormal[0] + vDir1[1] * PlaneNormal[1] + vDir1[2] * PlaneNormal[2];
+    if(s < 0)
+        s += REAL(1.0); // is ca. 0, if vDir1 and PlaneNormal are parallel
+    else
+        s -= REAL(1.0); // is ca. 0, if vDir1 and PlaneNormal are parallel
+    if(s < toleranz && s > (-toleranz))
+    {
+        // discs are parallel to the plane
+
+        // 1.compute if, and where contacts are
+        dVector3 P;
+        s = planevec[3] - dVector3Dot(planevec, G1Pos1);
+        dReal t;
+        t = planevec[3] - dVector3Dot(planevec, G1Pos2);
+        if(s >= t) // s == t does never happen, 
+        {
+            if(s >= 0)
+            {
+                // 1. Disc
+                dVector3Copy(G1Pos1, P);
+            }
+            else
+                return GeomCount; // no contacts
+        }
+        else
+        {
+            if(t >= 0)
+            {
+                // 2. Disc
+                dVector3Copy(G1Pos2, P);
+            }
+            else
+                return GeomCount; // no contacts
+        }
+
+        // 2. generate a coordinate-system on the disc
+        dVector3 V1, V2;
+        if(vDir1[0] < toleranz && vDir1[0] > (-toleranz))
+        {
+            // not x-axis
+            V1[0] = vDir1[0] + REAL(1.0); // random value
+            V1[1] = vDir1[1];
+            V1[2] = vDir1[2];
+        }
+        else
+        {
+            // maybe x-axis
+            V1[0] = vDir1[0];
+            V1[1] = vDir1[1] + REAL(1.0); // random value
+            V1[2] = vDir1[2];
+        }
+        // V1 is now another direction than vDir1
+        // Cross-product
+        dVector3Cross(V1, vDir1, V2);
+        // make unit V2
+        t = dVector3Length(V2);
+        t = radius / t;
+        dVector3Scale(V2, t);
+        // cross again
+        dVector3Cross(V2, vDir1, V1);
+        // |V2| is 'radius' and vDir1 unit, so |V1| is 'radius'
+        // V1 = first axis
+        // V2 = second axis
+
+        // 3. generate contactpoints
+
+        // Potential contact 1
+        dVector3Add(P, V1, contact->pos);
+        contact->depth = planevec[3] - dVector3Dot(planevec, contact->pos);
+        if(contact->depth > 0)
+        {
+            dVector3Copy(PlaneNormal, contact->normal);
+            contact->g1 = Cylinder;
+            contact->g2 = Plane;
+            contact->side1 = -1;
+            contact->side2 = -1;
+            GeomCount++;
+            if( GeomCount >= (flags & NUMC_MASK))
+                return GeomCount; // enough contactgeoms
+            contact = (dContactGeom *)((char *)contact + skip);
+        }
+
+        // Potential contact 2
+        dVector3Subtract(P, V1, contact->pos);
+        contact->depth = planevec[3] - dVector3Dot(planevec, contact->pos);
+        if(contact->depth > 0)
+        {
+            dVector3Copy(PlaneNormal, contact->normal);
+            contact->g1 = Cylinder;
+            contact->g2 = Plane;
+            contact->side1 = -1;
+            contact->side2 = -1;
+            GeomCount++;
+            if( GeomCount >= (flags & NUMC_MASK))
+                return GeomCount; // enough contactgeoms
+            contact = (dContactGeom *)((char *)contact + skip);
+        }
+
+        // Potential contact 3
+        dVector3Add(P, V2, contact->pos);
+        contact->depth = planevec[3] - dVector3Dot(planevec, contact->pos);
+        if(contact->depth > 0)
+        {
+            dVector3Copy(PlaneNormal, contact->normal);
+            contact->g1 = Cylinder;
+            contact->g2 = Plane;
+            contact->side1 = -1;
+            contact->side2 = -1;
+            GeomCount++;
+            if( GeomCount >= (flags & NUMC_MASK))
+                return GeomCount; // enough contactgeoms
+            contact = (dContactGeom *)((char *)contact + skip);
+        }
+
+        // Potential contact 4
+        dVector3Subtract(P, V2, contact->pos);
+        contact->depth = planevec[3] - dVector3Dot(planevec, contact->pos);
+        if(contact->depth > 0)
+        {
+            dVector3Copy(PlaneNormal, contact->normal);
+            contact->g1 = Cylinder;
+            contact->g2 = Plane;
+            contact->side1 = -1;
+            contact->side2 = -1;
+            GeomCount++;
+            if( GeomCount >= (flags & NUMC_MASK))
+                return GeomCount; // enough contactgeoms
+            contact = (dContactGeom *)((char *)contact + skip);
+        }
+    }
+    else
+    {
+        dReal t = dVector3Dot(PlaneNormal, vDir1);
+        C[0] = vDir1[0] * t - PlaneNormal[0];
+        C[1] = vDir1[1] * t - PlaneNormal[1];
+        C[2] = vDir1[2] * t - PlaneNormal[2];
+        s = dVector3Length(C);
+        // move C onto the circle
+        s = radius / s;
+        dVector3Scale(C, s);
+
+        // deepest point of disc 1
+        dVector3Add(C, G1Pos1, contact->pos);
+
+        // depth of the deepest point
+        contact->depth = planevec[3] - dVector3Dot(planevec, contact->pos);
+        if(contact->depth >= 0)
+        {
+            dVector3Copy(PlaneNormal, contact->normal);
+            contact->g1 = Cylinder;
+            contact->g2 = Plane;
+            contact->side1 = -1;
+            contact->side2 = -1;
+            GeomCount++;
+            if( GeomCount >= (flags & NUMC_MASK))
+                return GeomCount; // enough contactgeoms
+            contact = (dContactGeom *)((char *)contact + skip);
+        }
+
+        // C is still computed
+
+        // deepest point of disc 2
+        dVector3Add(C, G1Pos2, contact->pos);
+
+        // depth of the deepest point
+        contact->depth = planevec[3] - planevec[0] * contact->pos[0] - planevec[1] * contact->pos[1] - planevec[2] * contact->pos[2];
+        if(contact->depth >= 0)
+        {
+            dVector3Copy(PlaneNormal, contact->normal);
+            contact->g1 = Cylinder;
+            contact->g2 = Plane;
+            contact->side1 = -1;
+            contact->side2 = -1;
+            GeomCount++;
+            if( GeomCount >= (flags & NUMC_MASK))
+                return GeomCount; // enough contactgeoms
+            contact = (dContactGeom *)((char *)contact + skip);
+        }
+    }
+    return GeomCount;
+}
diff --git a/libs/ode-0.16.1/ode/src/collision_cylinder_sphere.cpp b/libs/ode-0.16.1/ode/src/collision_cylinder_sphere.cpp
new file mode 100644
index 0000000..4a5f6ec
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_cylinder_sphere.cpp
@@ -0,0 +1,277 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+/*******************************************************************
+ *                                                                 *
+ * cylinder-sphere collider by Christoph Beyer (boernerb@web.de)   *
+ *                                                                 *
+ * In Cylinder/Sphere-collisions, there are three possibilies:     *
+ * 1. collision with the cylinder's nappe                          *
+ * 2. collision with one of the cylinder's disc                    *
+ * 3. collision with one of the disc's border                      *
+ *                                                                 *
+ * This collider computes two distances (s, t) and based on them,  *
+ * it decides, which collision we have.                            *
+ * This collider always generates 1 (or 0, if we have no collison) *
+ * contacts.                                                       *
+ * It is able to "separate" cylinder and sphere in all             *
+ * configurations, but it never pays attention to velocity.        *
+ * So, in extrem situations, "tunneling-effect" is possible.       *
+ *                                                                 *
+ *******************************************************************/
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include <ode/objects.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"	// for dxGeom
+#include "collision_util.h"
+
+int dCollideCylinderSphere(dxGeom* Cylinder, dxGeom* Sphere, 
+                           int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (Cylinder->type == dCylinderClass);
+    dIASSERT (Sphere->type == dSphereClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    //unsigned char* pContactData = (unsigned char*)contact;
+    int GeomCount = 0; // count of used contacts
+
+#ifdef dSINGLE
+    const dReal toleranz = REAL(0.0001);
+#endif
+#ifdef dDOUBLE
+    const dReal toleranz = REAL(0.0000001);
+#endif
+
+    // get the data from the geoms
+    dReal radius, length;
+    dGeomCylinderGetParams(Cylinder, &radius, &length);
+    dVector3 &cylpos = Cylinder->final_posr->pos;
+    //const dReal* pfRot1 = dGeomGetRotation(Cylinder);
+
+    dReal radius2;
+    radius2 = dGeomSphereGetRadius(Sphere);
+    const dReal* SpherePos = dGeomGetPosition(Sphere);
+
+    // G1Pos1 is the middle of the first disc
+    // G1Pos2 is the middle of the second disc
+    // vDir1 is the unit direction of the cylinderaxis
+    dVector3 G1Pos1, G1Pos2, vDir1;
+    vDir1[0] = Cylinder->final_posr->R[2];
+    vDir1[1] = Cylinder->final_posr->R[6];
+    vDir1[2] = Cylinder->final_posr->R[10];
+
+    dReal s;
+    s = length * REAL(0.5); // just a precomputed factor
+    G1Pos2[0] = vDir1[0] * s + cylpos[0];
+    G1Pos2[1] = vDir1[1] * s + cylpos[1];
+    G1Pos2[2] = vDir1[2] * s + cylpos[2];
+
+    G1Pos1[0] = vDir1[0] * -s + cylpos[0];
+    G1Pos1[1] = vDir1[1] * -s + cylpos[1];
+    G1Pos1[2] = vDir1[2] * -s + cylpos[2];
+
+    dVector3 C;
+    dReal t;
+    // Step 1: compute the two distances 's' and 't'
+    // 's' is the distance from the first disc (in vDir1-/Zylinderaxis-direction), the disc with G1Pos1 in the middle
+    s = (SpherePos[0] - G1Pos1[0]) * vDir1[0] - (G1Pos1[1] - SpherePos[1]) * vDir1[1] - (G1Pos1[2] - SpherePos[2]) * vDir1[2];
+    if(s < (-radius2) || s > (length + radius2) )
+    {
+        // Sphere is too far away from the discs
+        // no collision
+        return 0;
+    }
+
+    // C is the direction from Sphere-middle to the cylinder-axis (vDir1); C is orthogonal to the cylinder-axis
+    C[0] = s * vDir1[0] + G1Pos1[0] - SpherePos[0];
+    C[1] = s * vDir1[1] + G1Pos1[1] - SpherePos[1];
+    C[2] = s * vDir1[2] + G1Pos1[2] - SpherePos[2];
+    // t is the distance from the Sphere-middle to the cylinder-axis!
+    t = dVector3Length(C);
+    if(t > (radius + radius2) )
+    {
+        // Sphere is too far away from the cylinder axis!
+        // no collision
+        return 0;
+    }
+
+    // decide which kind of collision we have:
+    if(t > radius && (s < 0 || s > length) )
+    {
+        // 3. collision
+        if(s <= 0)
+        {
+            contact->depth = radius2 - dSqrt( (s) * (s) + (t - radius) * (t - radius) );
+            if(contact->depth < 0)
+            {
+                // no collision!
+                return 0;
+            }
+            contact->pos[0] = C[0] / t * -radius + G1Pos1[0];
+            contact->pos[1] = C[1] / t * -radius + G1Pos1[1];
+            contact->pos[2] = C[2] / t * -radius + G1Pos1[2];
+            contact->normal[0] = (contact->pos[0] - SpherePos[0]) / (radius2 - contact->depth);
+            contact->normal[1] = (contact->pos[1] - SpherePos[1]) / (radius2 - contact->depth);
+            contact->normal[2] = (contact->pos[2] - SpherePos[2]) / (radius2 - contact->depth);
+            contact->g1 = Cylinder;
+            contact->g2 = Sphere;
+            contact->side1 = -1;
+            contact->side2 = -1;
+            GeomCount++;
+            return GeomCount;
+        }
+        else
+        {
+            // now s is bigger than length here!
+            contact->depth = radius2 - dSqrt( (s - length) * (s - length) + (t - radius) * (t - radius) );
+            if(contact->depth < 0)
+            {
+                // no collision!
+                return 0;
+            }
+            contact->pos[0] = C[0] / t * -radius + G1Pos2[0];
+            contact->pos[1] = C[1] / t * -radius + G1Pos2[1];
+            contact->pos[2] = C[2] / t * -radius + G1Pos2[2];
+            contact->normal[0] = (contact->pos[0] - SpherePos[0]) / (radius2 - contact->depth);
+            contact->normal[1] = (contact->pos[1] - SpherePos[1]) / (radius2 - contact->depth);
+            contact->normal[2] = (contact->pos[2] - SpherePos[2]) / (radius2 - contact->depth);
+            contact->g1 = Cylinder;
+            contact->g2 = Sphere;
+            contact->side1 = -1;
+            contact->side2 = -1;
+            GeomCount++;
+            return GeomCount;
+        }
+    }
+    else if( (radius - t) <= s && (radius - t) <= (length - s) )
+    {
+        // 1. collsision
+        if(t > (radius2 + toleranz))
+        {
+            // cylinder-axis is outside the sphere
+            contact->depth = (radius2 + radius) - t;
+            if(contact->depth < 0)
+            {
+                // should never happen, but just for safeness
+                return 0;
+            }
+            else
+            {
+                C[0] /= t;
+                C[1] /= t;
+                C[2] /= t;
+                contact->pos[0] = C[0] * radius2 + SpherePos[0];
+                contact->pos[1] = C[1] * radius2 + SpherePos[1];
+                contact->pos[2] = C[2] * radius2 + SpherePos[2];
+                contact->normal[0] = C[0];
+                contact->normal[1] = C[1];
+                contact->normal[2] = C[2];
+                contact->g1 = Cylinder;
+                contact->g2 = Sphere;
+                contact->side1 = -1;
+                contact->side2 = -1;
+                GeomCount++;
+                return GeomCount;
+            }
+        }
+        else
+        {
+            // cylinder-axis is outside of the sphere
+            contact->depth = (radius2 + radius) - t;
+            if(contact->depth < 0)
+            {
+                // should never happen, but just for safeness
+                return 0;
+            }
+            else
+            {
+                contact->pos[0] = C[0] + SpherePos[0];
+                contact->pos[1] = C[1] + SpherePos[1];
+                contact->pos[2] = C[2] + SpherePos[2];
+                contact->normal[0] = C[0] / t;
+                contact->normal[1] = C[1] / t;
+                contact->normal[2] = C[2] / t;
+                contact->g1 = Cylinder;
+                contact->g2 = Sphere;
+                contact->side1 = -1;
+                contact->side2 = -1;
+                GeomCount++;
+                return GeomCount;
+            }
+        }
+    }
+    else
+    {
+        // 2. collision
+        if(s <= (length * REAL(0.5)) )
+        {
+            // collsision with the first disc
+            contact->depth = s + radius2;
+            if(contact->depth < 0)
+            {
+                // should never happen, but just for safeness
+                return 0;
+            }
+            contact->pos[0] = radius2 * vDir1[0] + SpherePos[0];
+            contact->pos[1] = radius2 * vDir1[1] + SpherePos[1];
+            contact->pos[2] = radius2 * vDir1[2] + SpherePos[2];
+            contact->normal[0] = vDir1[0];
+            contact->normal[1] = vDir1[1];
+            contact->normal[2] = vDir1[2];
+            contact->g1 = Cylinder;
+            contact->g2 = Sphere;
+            contact->side1 = -1;
+            contact->side2 = -1;
+            GeomCount++;
+            return GeomCount;
+        }
+        else
+        {
+            // collsision with the second disc
+            contact->depth = (radius2 + length - s);
+            if(contact->depth < 0)
+            {
+                // should never happen, but just for safeness
+                return 0;
+            }
+            contact->pos[0] = radius2 * -vDir1[0] + SpherePos[0];
+            contact->pos[1] = radius2 * -vDir1[1] + SpherePos[1];
+            contact->pos[2] = radius2 * -vDir1[2] + SpherePos[2];
+            contact->normal[0] = -vDir1[0];
+            contact->normal[1] = -vDir1[1];
+            contact->normal[2] = -vDir1[2];
+            contact->g1 = Cylinder;
+            contact->g2 = Sphere;
+            contact->side1 = -1;
+            contact->side2 = -1;
+            GeomCount++;
+            return GeomCount;
+        }
+    }
+    return GeomCount;
+}
diff --git a/libs/ode-0.16.1/ode/src/collision_cylinder_trimesh.cpp b/libs/ode-0.16.1/ode/src/collision_cylinder_trimesh.cpp
new file mode 100644
index 0000000..fd22e1a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_cylinder_trimesh.cpp
@@ -0,0 +1,1171 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *	Cylinder-trimesh collider by Alen Ladavac
+ *   Ported to ODE by Nguyen Binh
+ */
+
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_util.h"
+#include "collision_trimesh_internal.h"
+#include "util.h"
+
+#if dTRIMESH_ENABLED
+
+#define MAX_REAL	dInfinity
+static const int	nCYLINDER_AXIS				= 2;
+static const int    nCYLINDER_CIRCLE_SEGMENTS	= 8;
+static const int    nMAX_CYLINDER_TRIANGLE_CLIP_POINTS	= 12;
+
+#define OPTIMIZE_CONTACTS 1
+
+// Local contacts data
+typedef struct _sLocalContactData
+{
+    dVector3	vPos;
+    dVector3	vNormal;
+    dReal		fDepth;
+    int			triIndex;
+    int			nFlags; // 0 = filtered out, 1 = OK
+}sLocalContactData;
+
+struct sCylinderTrimeshColliderData
+{
+    sCylinderTrimeshColliderData(int flags, int skip): m_iFlags(flags), m_iSkip(skip), m_nContacts(0), m_gLocalContacts(NULL) {}
+
+#ifdef OPTIMIZE_CONTACTS
+    void _OptimizeLocalContacts();
+#endif
+    void _InitCylinderTrimeshData(dxGeom *Cylinder, dxTriMesh *Trimesh);
+    int	_ProcessLocalContacts(dContactGeom *contact, dxGeom *Cylinder, dxTriMesh *Trimesh);
+
+    bool _cldTestAxis(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2, 
+        dVector3& vAxis, int iAxis, bool bNoFlip = false);
+    bool _cldTestCircleToEdgeAxis(
+        const dVector3 &v0, const dVector3 &v1, const dVector3 &v2,
+        const dVector3 &vCenterPoint, const dVector3 &vCylinderAxis1,
+        const dVector3 &vVx0, const dVector3 &vVx1, int iAxis);
+    bool _cldTestSeparatingAxes(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2);
+    bool _cldClipCylinderEdgeToTriangle(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2);
+    void _cldClipCylinderToTriangle(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2);
+    void TestOneTriangleVsCylinder(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2, 
+        const bool bDoubleSided);
+    int TestCollisionForSingleTriangle(int ctContacts0, int Triint, dVector3 dv[3], 
+        bool &bOutFinishSearching);
+
+    // cylinder data
+    dMatrix3	m_mCylinderRot;
+    dQuaternion	m_qCylinderRot;
+    dQuaternion	m_qInvCylinderRot;
+    dVector3	m_vCylinderPos;
+    dVector3	m_vCylinderAxis;
+    dReal		m_fCylinderRadius;
+    dReal		m_fCylinderSize;
+    dVector3	m_avCylinderNormals[nCYLINDER_CIRCLE_SEGMENTS];
+
+    // mesh data
+    dQuaternion	m_qTrimeshRot;
+    dQuaternion	m_qInvTrimeshRot;
+    dMatrix3	m_mTrimeshRot;
+    dVector3	m_vTrimeshPos;
+
+    // global collider data
+    dVector3	m_vBestPoint;
+    dReal		m_fBestDepth;
+    dReal		m_fBestCenter;
+    dReal		m_fBestrt;
+    int			m_iBestAxis;
+    dVector3	m_vContactNormal;
+    dVector3	m_vNormal;
+    dVector3	m_vE0;
+    dVector3	m_vE1;
+    dVector3	m_vE2;
+
+    // ODE stuff
+    int					m_iFlags;
+    int					m_iSkip;
+    int					m_nContacts;// = 0;
+    sLocalContactData*	m_gLocalContacts;
+};
+
+
+#ifdef OPTIMIZE_CONTACTS
+
+// Use to classify contacts to be "near" in position
+static const dReal fSameContactPositionEpsilon = REAL(0.0001); // 1e-4
+// Use to classify contacts to be "near" in normal direction
+static const dReal fSameContactNormalEpsilon = REAL(0.0001); // 1e-4
+
+// If this two contact can be classified as "near"
+inline int _IsNearContacts(sLocalContactData& c1,sLocalContactData& c2)
+{
+    int bPosNear = 0;
+    int bSameDir = 0;
+    dVector3	vDiff;
+
+    // First check if they are "near" in position
+    dVector3Subtract(c1.vPos,c2.vPos,vDiff);
+    if (  (dFabs(vDiff[0]) < fSameContactPositionEpsilon)
+        &&(dFabs(vDiff[1]) < fSameContactPositionEpsilon)
+        &&(dFabs(vDiff[2]) < fSameContactPositionEpsilon))
+    {
+        bPosNear = 1;
+    }
+
+    // Second check if they are "near" in normal direction
+    dVector3Subtract(c1.vNormal,c2.vNormal,vDiff);
+    if (  (dFabs(vDiff[0]) < fSameContactNormalEpsilon)
+        &&(dFabs(vDiff[1]) < fSameContactNormalEpsilon)
+        &&(dFabs(vDiff[2]) < fSameContactNormalEpsilon) )
+    {
+        bSameDir = 1;
+    }
+
+    // Will be "near" if position and normal direction are "near"
+    return (bPosNear && bSameDir);
+}
+
+inline int _IsBetter(sLocalContactData& c1,sLocalContactData& c2)
+{
+    // The not better will be throw away
+    // You can change the selection criteria here
+    return (c1.fDepth > c2.fDepth);
+}
+
+// iterate through gLocalContacts and filtered out "near contact"
+void sCylinderTrimeshColliderData::_OptimizeLocalContacts()
+{
+    int nContacts = m_nContacts;
+
+    for (int i = 0; i < nContacts-1; i++)
+    {
+        for (int j = i+1; j < nContacts; j++)
+        {
+            if (_IsNearContacts(m_gLocalContacts[i],m_gLocalContacts[j]))
+            {
+                // If they are seem to be the same then filtered 
+                // out the least penetrate one
+                if (_IsBetter(m_gLocalContacts[j],m_gLocalContacts[i]))
+                {
+                    m_gLocalContacts[i].nFlags = 0; // filtered 1st contact
+                }
+                else
+                {
+                    m_gLocalContacts[j].nFlags = 0; // filtered 2nd contact
+                }
+
+                // NOTE
+                // There is other way is to add two depth together but
+                // it not work so well. Why???
+            }
+        }
+    }
+}
+#endif // OPTIMIZE_CONTACTS
+
+int	sCylinderTrimeshColliderData::_ProcessLocalContacts(dContactGeom *contact, 
+                                                        dxGeom *Cylinder, dxTriMesh *Trimesh)
+{
+#ifdef OPTIMIZE_CONTACTS
+    if (m_nContacts > 1 && !(m_iFlags & CONTACTS_UNIMPORTANT))
+    {
+        // Can be optimized...
+        _OptimizeLocalContacts();
+    }
+#endif		
+
+    int iContact = 0;
+    dContactGeom* Contact = 0;
+
+    int nFinalContact = 0;
+
+    for (iContact = 0; iContact < m_nContacts; iContact ++)
+    {
+        if (1 == m_gLocalContacts[iContact].nFlags)
+        {
+            Contact = SAFECONTACT(m_iFlags, contact, nFinalContact, m_iSkip);
+            Contact->depth = m_gLocalContacts[iContact].fDepth;
+            dVector3Copy(m_gLocalContacts[iContact].vNormal,Contact->normal);
+            dVector3Copy(m_gLocalContacts[iContact].vPos,Contact->pos);
+            Contact->g1 = Cylinder;
+            Contact->g2 = Trimesh;
+            Contact->side1 = -1;
+            Contact->side2 = m_gLocalContacts[iContact].triIndex;
+            dVector3Inv(Contact->normal);
+
+            nFinalContact++;
+        }
+    }
+    // debug
+    //if (nFinalContact != m_nContacts)
+    //{
+    //	printf("[Info] %d contacts generated,%d  filtered.\n",m_nContacts,m_nContacts-nFinalContact);
+    //}
+
+    return nFinalContact;
+}
+
+
+bool sCylinderTrimeshColliderData::_cldTestAxis(
+    const dVector3 &v0,
+    const dVector3 &v1,
+    const dVector3 &v2, 
+    dVector3& vAxis, 
+    int iAxis,
+    bool bNoFlip/* = false*/)
+{
+
+    // calculate length of separating axis vector
+    dReal fL = dVector3Length(vAxis);
+    // if not long enough
+    if ( fL < REAL(1e-5) )
+    {
+        // do nothing
+        return true;
+    }
+
+    // otherwise normalize it
+    vAxis[0] /= fL;
+    vAxis[1] /= fL;
+    vAxis[2] /= fL;
+
+    dReal fdot1 = dVector3Dot(m_vCylinderAxis,vAxis);
+    // project capsule on vAxis
+    dReal frc;
+
+    if (dFabs(fdot1) > REAL(1.0) ) 
+    {
+        //		fdot1 = REAL(1.0);
+        frc = dFabs(m_fCylinderSize* REAL(0.5));
+    }
+    else
+    {
+        frc = dFabs((m_fCylinderSize* REAL(0.5)) * fdot1)
+            + m_fCylinderRadius * dSqrt(REAL(1.0)-(fdot1*fdot1));
+    }
+
+    dVector3 vV0;
+    dVector3Subtract(v0,m_vCylinderPos,vV0);
+    dVector3 vV1;
+    dVector3Subtract(v1,m_vCylinderPos,vV1);
+    dVector3 vV2;
+    dVector3Subtract(v2,m_vCylinderPos,vV2);
+
+    // project triangle on vAxis
+    dReal afv[3];
+    afv[0] = dVector3Dot( vV0 , vAxis );
+    afv[1] = dVector3Dot( vV1 , vAxis );
+    afv[2] = dVector3Dot( vV2 , vAxis );
+
+    dReal fMin = MAX_REAL;
+    dReal fMax = -MAX_REAL;
+
+    // for each vertex 
+    for(int i = 0; i < 3; i++) 
+    {
+        // find minimum
+        if (afv[i]<fMin) 
+        {
+            fMin = afv[i];
+        }
+        // find maximum
+        if (afv[i]>fMax) 
+        {
+            fMax = afv[i];
+        }
+    }
+
+    // find capsule's center of interval on axis
+    dReal fCenter = (fMin+fMax)* REAL(0.5);
+    // calculate triangles halfinterval 
+    dReal fTriangleRadius = (fMax-fMin)*REAL(0.5);
+
+    // if they do not overlap, 
+    if( dFabs(fCenter) > (frc+fTriangleRadius) ) 
+    { 
+        // exit, we have no intersection
+        return false; 
+    }
+
+    // calculate depth 
+    dReal fDepth = -(dFabs(fCenter) - (frc + fTriangleRadius ) );
+
+    // if greater then best found so far
+    if ( fDepth < m_fBestDepth ) 
+    {
+        // remember depth
+        m_fBestDepth			= fDepth;
+        m_fBestCenter		    = fCenter;
+        m_fBestrt				= frc;
+        dVector3Copy(vAxis,m_vContactNormal);
+        m_iBestAxis				= iAxis;
+
+        // flip normal if interval is wrong faced
+        if ( fCenter< REAL(0.0) && !bNoFlip) 
+        { 
+            dVector3Inv(m_vContactNormal);
+            m_fBestCenter = -fCenter;
+        }
+    }
+
+    return true;
+}
+
+// intersection test between edge and circle
+bool sCylinderTrimeshColliderData::_cldTestCircleToEdgeAxis(
+    const dVector3 &v0, const dVector3 &v1, const dVector3 &v2,
+    const dVector3 &vCenterPoint, const dVector3 &vCylinderAxis1,
+    const dVector3 &vVx0, const dVector3 &vVx1, int iAxis) 
+{
+    // calculate direction of edge
+    dVector3 vkl;
+    dVector3Subtract( vVx1 , vVx0 , vkl);
+    dNormalize3(vkl);
+    // starting point of edge 
+    dVector3 vol;
+    dVector3Copy(vVx0,vol);
+
+    // calculate angle cosine between cylinder axis and edge
+    dReal fdot2 = dVector3Dot(vkl , vCylinderAxis1);
+
+    // if edge is perpendicular to cylinder axis
+    if(dFabs(fdot2)<REAL(1e-5))
+    {
+        // this can't be separating axis, because edge is parallel to circle plane
+        return true;
+    }
+
+    // find point of intersection between edge line and circle plane
+    dVector3 vTemp;
+    dVector3Subtract(vCenterPoint,vol,vTemp);
+    dReal fdot1 = dVector3Dot(vTemp,vCylinderAxis1);
+    dVector3 vpnt;// = vol + vkl * (fdot1/fdot2);
+    vpnt[0] = vol[0] + vkl[0] * fdot1/fdot2;
+    vpnt[1] = vol[1] + vkl[1] * fdot1/fdot2;
+    vpnt[2] = vol[2] + vkl[2] * fdot1/fdot2;
+
+    // find tangent vector on circle with same center (vCenterPoint) that touches point of intersection (vpnt)
+    dVector3 vTangent;
+    dVector3Subtract(vCenterPoint,vpnt,vTemp);
+    dVector3Cross(vTemp,vCylinderAxis1,vTangent);
+
+    // find vector orthogonal both to tangent and edge direction
+    dVector3 vAxis;
+    dVector3Cross(vTangent,vkl,vAxis);
+
+    // use that vector as separating axis
+    return _cldTestAxis( v0, v1, v2, vAxis, iAxis );
+}
+
+// helper for less key strokes
+// r = ( (v1 - v2) cross v3 ) cross v3
+inline void _CalculateAxis(const dVector3& v1,
+                           const dVector3& v2,
+                           const dVector3& v3,
+                           dVector3& r)
+{
+    dVector3 t1;
+    dVector3 t2;
+
+    dVector3Subtract(v1,v2,t1);
+    dVector3Cross(t1,v3,t2);
+    dVector3Cross(t2,v3,r);
+}
+
+bool sCylinderTrimeshColliderData::_cldTestSeparatingAxes(
+    const dVector3 &v0,
+    const dVector3 &v1,
+    const dVector3 &v2) 
+{
+
+    // calculate edge vectors
+    dVector3Subtract(v1 ,v0 , m_vE0);
+    // m_vE1 has been calculated before -> so save some cycles here
+    dVector3Subtract(v0 ,v2 , m_vE2);
+
+    // calculate caps centers in absolute space
+    dVector3 vCp0;
+    vCp0[0] = m_vCylinderPos[0] + m_vCylinderAxis[0]*(m_fCylinderSize* REAL(0.5));
+    vCp0[1] = m_vCylinderPos[1] + m_vCylinderAxis[1]*(m_fCylinderSize* REAL(0.5));
+    vCp0[2] = m_vCylinderPos[2] + m_vCylinderAxis[2]*(m_fCylinderSize* REAL(0.5));
+
+#if 0
+    dVector3 vCp1;
+    vCp1[0] = m_vCylinderPos[0] - m_vCylinderAxis[0]*(m_fCylinderSize* REAL(0.5));
+    vCp1[1] = m_vCylinderPos[1] - m_vCylinderAxis[1]*(m_fCylinderSize* REAL(0.5));
+    vCp1[2] = m_vCylinderPos[2] - m_vCylinderAxis[2]*(m_fCylinderSize* REAL(0.5));
+#endif
+
+    // reset best axis
+    m_iBestAxis = 0;
+    dVector3 vAxis;
+
+    // axis m_vNormal
+    //vAxis = -m_vNormal;
+    vAxis[0] = -m_vNormal[0];
+    vAxis[1] = -m_vNormal[1];
+    vAxis[2] = -m_vNormal[2];
+    if (!_cldTestAxis(v0, v1, v2, vAxis, 1, true)) 
+    { 
+        return false; 
+    }
+
+    // axis CxE0
+    // vAxis = ( m_vCylinderAxis cross m_vE0 );
+    dVector3Cross(m_vCylinderAxis, m_vE0,vAxis);
+    if (!_cldTestAxis(v0, v1, v2, vAxis, 2)) 
+    { 
+        return false; 
+    }
+
+    // axis CxE1
+    // vAxis = ( m_vCylinderAxis cross m_vE1 );
+    dVector3Cross(m_vCylinderAxis, m_vE1,vAxis);
+    if (!_cldTestAxis(v0, v1, v2, vAxis, 3)) 
+    { 
+        return false; 
+    }
+
+    // axis CxE2
+    // vAxis = ( m_vCylinderAxis cross m_vE2 );
+    dVector3Cross(m_vCylinderAxis, m_vE2,vAxis);
+    if (!_cldTestAxis(v0, v1, v2, vAxis, 4)) 
+    { 
+        return false; 
+    }
+
+    // first vertex on triangle
+    // axis ((V0-Cp0) x C) x C
+    //vAxis = ( ( v0-vCp0 ) cross m_vCylinderAxis ) cross m_vCylinderAxis;
+    _CalculateAxis(v0 , vCp0 , m_vCylinderAxis , vAxis);
+    if (!_cldTestAxis(v0, v1, v2, vAxis, 11)) 
+    { 
+        return false; 
+    }
+
+    // second vertex on triangle
+    // axis ((V1-Cp0) x C) x C
+    // vAxis = ( ( v1-vCp0 ) cross m_vCylinderAxis ) cross m_vCylinderAxis;
+    _CalculateAxis(v1 , vCp0 , m_vCylinderAxis , vAxis);
+    if (!_cldTestAxis(v0, v1, v2, vAxis, 12)) 
+    { 
+        return false; 
+    }
+
+    // third vertex on triangle
+    // axis ((V2-Cp0) x C) x C
+    //vAxis = ( ( v2-vCp0 ) cross m_vCylinderAxis ) cross m_vCylinderAxis;
+    _CalculateAxis(v2 , vCp0 , m_vCylinderAxis , vAxis);
+    if (!_cldTestAxis(v0, v1, v2, vAxis, 13))
+    { 
+        return false; 
+    }
+
+    // test cylinder axis
+    // vAxis = m_vCylinderAxis;
+    dVector3Copy(m_vCylinderAxis , vAxis);
+    if (!_cldTestAxis(v0, v1, v2, vAxis, 14)) 
+    { 
+        return false; 
+    }
+
+    // Test top and bottom circle ring of cylinder for separation
+    dVector3 vccATop;
+    vccATop[0] = m_vCylinderPos[0] + m_vCylinderAxis[0]*(m_fCylinderSize * REAL(0.5));
+    vccATop[1] = m_vCylinderPos[1] + m_vCylinderAxis[1]*(m_fCylinderSize * REAL(0.5));
+    vccATop[2] = m_vCylinderPos[2] + m_vCylinderAxis[2]*(m_fCylinderSize * REAL(0.5));
+
+    dVector3 vccABottom;
+    vccABottom[0] = m_vCylinderPos[0] - m_vCylinderAxis[0]*(m_fCylinderSize * REAL(0.5));
+    vccABottom[1] = m_vCylinderPos[1] - m_vCylinderAxis[1]*(m_fCylinderSize * REAL(0.5));
+    vccABottom[2] = m_vCylinderPos[2] - m_vCylinderAxis[2]*(m_fCylinderSize * REAL(0.5));
+
+
+    if (!_cldTestCircleToEdgeAxis(v0, v1, v2, vccATop, m_vCylinderAxis, v0, v1, 15)) 
+    {
+        return false;
+    }
+
+    if (!_cldTestCircleToEdgeAxis(v0, v1, v2, vccATop, m_vCylinderAxis, v1, v2, 16)) 
+    {
+        return false;
+    }
+
+    if (!_cldTestCircleToEdgeAxis(v0, v1, v2, vccATop, m_vCylinderAxis, v0, v2, 17)) 
+    {
+        return false;
+    }
+
+    if (!_cldTestCircleToEdgeAxis(v0, v1, v2, vccABottom, m_vCylinderAxis, v0, v1, 18)) 
+    {
+        return false;
+    }
+
+    if (!_cldTestCircleToEdgeAxis(v0, v1, v2, vccABottom, m_vCylinderAxis, v1, v2, 19)) 
+    {
+        return false;
+    }
+
+    if (!_cldTestCircleToEdgeAxis(v0, v1, v2, vccABottom, m_vCylinderAxis, v0, v2, 20)) 
+    {
+        return false;
+    }
+
+    return true;
+}
+
+bool sCylinderTrimeshColliderData::_cldClipCylinderEdgeToTriangle(
+    const dVector3 &v0, const dVector3 &/*v1*/, const dVector3 &/*v2*/)
+{
+    // translate cylinder
+    dReal fTemp = dVector3Dot(m_vCylinderAxis , m_vContactNormal);
+    dVector3 vN2;
+    vN2[0] = m_vContactNormal[0] - m_vCylinderAxis[0]*fTemp;
+    vN2[1] = m_vContactNormal[1] - m_vCylinderAxis[1]*fTemp;
+    vN2[2] = m_vContactNormal[2] - m_vCylinderAxis[2]*fTemp;
+
+    fTemp = dVector3Length(vN2);
+    if (fTemp < REAL(1e-5))
+    {
+        return false;
+    }
+
+    // Normalize it
+    vN2[0] /= fTemp;
+    vN2[1] /= fTemp;
+    vN2[2] /= fTemp;
+
+    // calculate caps centers in absolute space
+    dVector3 vCposTrans;
+    vCposTrans[0] = m_vCylinderPos[0] + vN2[0]*m_fCylinderRadius;
+    vCposTrans[1] = m_vCylinderPos[1] + vN2[1]*m_fCylinderRadius;
+    vCposTrans[2] = m_vCylinderPos[2] + vN2[2]*m_fCylinderRadius;
+
+    dVector3 vCEdgePoint0;
+    vCEdgePoint0[0]  = vCposTrans[0] + m_vCylinderAxis[0] * (m_fCylinderSize* REAL(0.5));
+    vCEdgePoint0[1]  = vCposTrans[1] + m_vCylinderAxis[1] * (m_fCylinderSize* REAL(0.5));
+    vCEdgePoint0[2]  = vCposTrans[2] + m_vCylinderAxis[2] * (m_fCylinderSize* REAL(0.5));
+
+    dVector3 vCEdgePoint1;
+    vCEdgePoint1[0]  = vCposTrans[0] - m_vCylinderAxis[0] * (m_fCylinderSize* REAL(0.5));
+    vCEdgePoint1[1]  = vCposTrans[1] - m_vCylinderAxis[1] * (m_fCylinderSize* REAL(0.5));
+    vCEdgePoint1[2]  = vCposTrans[2] - m_vCylinderAxis[2] * (m_fCylinderSize* REAL(0.5));
+
+    // transform cylinder edge points into triangle space
+    vCEdgePoint0[0] -= v0[0];
+    vCEdgePoint0[1] -= v0[1];
+    vCEdgePoint0[2] -= v0[2];
+
+    vCEdgePoint1[0] -= v0[0];
+    vCEdgePoint1[1] -= v0[1];
+    vCEdgePoint1[2] -= v0[2];
+
+    dVector4 plPlane;
+    dVector3 vPlaneNormal;
+
+    // triangle plane
+    //plPlane = Plane4f( -m_vNormal, 0);
+    vPlaneNormal[0] = -m_vNormal[0];
+    vPlaneNormal[1] = -m_vNormal[1];
+    vPlaneNormal[2] = -m_vNormal[2];
+    dConstructPlane(vPlaneNormal,REAL(0.0),plPlane);
+    if(!dClipEdgeToPlane( vCEdgePoint0, vCEdgePoint1, plPlane )) 
+    { 
+        return false; 
+    }
+
+    // plane with edge 0
+    //plPlane = Plane4f( ( m_vNormal cross m_vE0 ), REAL(1e-5));
+    dVector3Cross(m_vNormal,m_vE0,vPlaneNormal);
+    dConstructPlane(vPlaneNormal,REAL(1e-5),plPlane);
+    if(!dClipEdgeToPlane( vCEdgePoint0, vCEdgePoint1, plPlane )) 
+    { 
+        return false; 
+    }
+
+    // plane with edge 1
+    //dVector3 vTemp = ( m_vNormal cross m_vE1 );
+    dVector3Cross(m_vNormal,m_vE1,vPlaneNormal);
+    fTemp = dVector3Dot(m_vE0 , vPlaneNormal) - REAL(1e-5);
+    //plPlane = Plane4f( vTemp, -(( m_vE0 dot vTemp )-REAL(1e-5)));
+    dConstructPlane(vPlaneNormal,-fTemp,plPlane);
+    if(!dClipEdgeToPlane( vCEdgePoint0, vCEdgePoint1, plPlane )) 
+    {
+        return false;
+    }
+
+    // plane with edge 2
+    // plPlane = Plane4f( ( m_vNormal cross m_vE2 ), REAL(1e-5));
+    dVector3Cross(m_vNormal,m_vE2,vPlaneNormal);
+    dConstructPlane(vPlaneNormal,REAL(1e-5),plPlane);
+    if(!dClipEdgeToPlane( vCEdgePoint0, vCEdgePoint1, plPlane )) 
+    { 
+        return false; 
+    }
+
+    // return capsule edge points into absolute space
+    vCEdgePoint0[0] += v0[0];
+    vCEdgePoint0[1] += v0[1];
+    vCEdgePoint0[2] += v0[2];
+
+    vCEdgePoint1[0] += v0[0];
+    vCEdgePoint1[1] += v0[1];
+    vCEdgePoint1[2] += v0[2];
+
+    // calculate depths for both contact points
+    dVector3 vTemp;
+    dVector3Subtract(vCEdgePoint0,m_vCylinderPos, vTemp);
+    dReal fRestDepth0 = -dVector3Dot(vTemp,m_vContactNormal) + m_fBestrt;
+    dVector3Subtract(vCEdgePoint1,m_vCylinderPos, vTemp);
+    dReal fRestDepth1 = -dVector3Dot(vTemp,m_vContactNormal) + m_fBestrt;
+
+    dReal fDepth0 = m_fBestDepth - (fRestDepth0);
+    dReal fDepth1 = m_fBestDepth - (fRestDepth1);
+
+    // clamp depths to zero
+    if(fDepth0 < REAL(0.0) ) 
+    {
+        fDepth0 = REAL(0.0);
+    }
+
+    if(fDepth1<REAL(0.0)) 
+    {
+        fDepth1 = REAL(0.0);
+    }
+
+    // Generate contact 0
+    {
+        m_gLocalContacts[m_nContacts].fDepth = fDepth0;
+        dVector3Copy(m_vContactNormal,m_gLocalContacts[m_nContacts].vNormal);
+        dVector3Copy(vCEdgePoint0,m_gLocalContacts[m_nContacts].vPos);
+        m_gLocalContacts[m_nContacts].nFlags = 1;
+        m_nContacts++;
+        if(m_nContacts >= (m_iFlags & NUMC_MASK)) 
+            return true;
+    }
+
+    // Generate contact 1
+    {
+        // generate contacts
+        m_gLocalContacts[m_nContacts].fDepth = fDepth1;
+        dVector3Copy(m_vContactNormal,m_gLocalContacts[m_nContacts].vNormal);
+        dVector3Copy(vCEdgePoint1,m_gLocalContacts[m_nContacts].vPos);
+        m_gLocalContacts[m_nContacts].nFlags = 1;
+        m_nContacts++;		
+    }
+
+    return true;
+}
+
+void sCylinderTrimeshColliderData::_cldClipCylinderToTriangle(
+    const dVector3 &v0, const dVector3 &v1, const dVector3 &v2)
+{
+    int i = 0;
+    dVector3 avPoints[3];
+    dVector3 avTempArray1[nMAX_CYLINDER_TRIANGLE_CLIP_POINTS];
+    dVector3 avTempArray2[nMAX_CYLINDER_TRIANGLE_CLIP_POINTS];
+
+    dSetZero(&avTempArray1[0][0],nMAX_CYLINDER_TRIANGLE_CLIP_POINTS * 4);
+    dSetZero(&avTempArray2[0][0],nMAX_CYLINDER_TRIANGLE_CLIP_POINTS * 4);
+
+    // setup array of triangle vertices
+    dVector3Copy(v0,avPoints[0]);
+    dVector3Copy(v1,avPoints[1]);
+    dVector3Copy(v2,avPoints[2]);
+
+    dVector3 vCylinderCirclePos, vCylinderCircleNormal_Rel;
+    dSetZero(vCylinderCircleNormal_Rel,4);
+    // check which circle from cylinder we take for clipping
+    if ( dVector3Dot(m_vCylinderAxis , m_vContactNormal) > REAL(0.0)) 
+    {
+        // get top circle
+        vCylinderCirclePos[0] = m_vCylinderPos[0] + m_vCylinderAxis[0]*(m_fCylinderSize*REAL(0.5));
+        vCylinderCirclePos[1] = m_vCylinderPos[1] + m_vCylinderAxis[1]*(m_fCylinderSize*REAL(0.5));
+        vCylinderCirclePos[2] = m_vCylinderPos[2] + m_vCylinderAxis[2]*(m_fCylinderSize*REAL(0.5));
+
+        vCylinderCircleNormal_Rel[nCYLINDER_AXIS] = REAL(-1.0);
+    } 
+    else 
+    {
+        // get bottom circle
+        vCylinderCirclePos[0] = m_vCylinderPos[0] - m_vCylinderAxis[0]*(m_fCylinderSize*REAL(0.5));
+        vCylinderCirclePos[1] = m_vCylinderPos[1] - m_vCylinderAxis[1]*(m_fCylinderSize*REAL(0.5));
+        vCylinderCirclePos[2] = m_vCylinderPos[2] - m_vCylinderAxis[2]*(m_fCylinderSize*REAL(0.5));
+
+        vCylinderCircleNormal_Rel[nCYLINDER_AXIS] = REAL(1.0);
+    }
+
+    dVector3 vTemp;
+    dQuatInv(m_qCylinderRot , m_qInvCylinderRot);
+    // transform triangle points to space of cylinder circle
+    for(i=0; i<3; i++) 
+    {
+        dVector3Subtract(avPoints[i] , vCylinderCirclePos , vTemp);
+        dQuatTransform(m_qInvCylinderRot,vTemp,avPoints[i]);
+    }
+
+    int iTmpCounter1 = 0;
+    int iTmpCounter2 = 0;
+    dVector4 plPlane;
+
+    // plane of cylinder that contains circle for intersection
+    //plPlane = Plane4f( vCylinderCircleNormal_Rel, 0.0f );
+    dConstructPlane(vCylinderCircleNormal_Rel,REAL(0.0),plPlane);
+    dClipPolyToPlane(avPoints, 3, avTempArray1, iTmpCounter1, plPlane);
+
+    // Body of base circle of Cylinder
+    int nCircleSegment = 0;
+    for (nCircleSegment = 0; nCircleSegment < nCYLINDER_CIRCLE_SEGMENTS; nCircleSegment++)
+    {
+        dConstructPlane(m_avCylinderNormals[nCircleSegment],m_fCylinderRadius,plPlane);
+
+        if (0 == (nCircleSegment % 2))
+        {
+            dClipPolyToPlane( avTempArray1 , iTmpCounter1 , avTempArray2, iTmpCounter2, plPlane);
+        }
+        else
+        {
+            dClipPolyToPlane( avTempArray2, iTmpCounter2, avTempArray1 , iTmpCounter1 , plPlane );
+        }
+
+        dIASSERT( iTmpCounter1 >= 0 && iTmpCounter1 <= nMAX_CYLINDER_TRIANGLE_CLIP_POINTS );
+        dIASSERT( iTmpCounter2 >= 0 && iTmpCounter2 <= nMAX_CYLINDER_TRIANGLE_CLIP_POINTS );
+    }
+
+    // back transform clipped points to absolute space
+    dReal ftmpdot;	
+    dReal fTempDepth;
+    dVector3 vPoint;
+
+    if (nCircleSegment %2)
+    {
+        for( i=0; i<iTmpCounter2; i++)
+        {
+            dQuatTransform(m_qCylinderRot,avTempArray2[i], vPoint);
+            vPoint[0] += vCylinderCirclePos[0];
+            vPoint[1] += vCylinderCirclePos[1];
+            vPoint[2] += vCylinderCirclePos[2];
+
+            dVector3Subtract(vPoint,m_vCylinderPos,vTemp);
+            ftmpdot	 = dFabs(dVector3Dot(vTemp, m_vContactNormal));
+            fTempDepth = m_fBestrt - ftmpdot;
+            // Depth must be positive
+            if (fTempDepth > REAL(0.0))
+            {
+                m_gLocalContacts[m_nContacts].fDepth = fTempDepth;
+                dVector3Copy(m_vContactNormal,m_gLocalContacts[m_nContacts].vNormal);
+                dVector3Copy(vPoint,m_gLocalContacts[m_nContacts].vPos);
+                m_gLocalContacts[m_nContacts].nFlags = 1;
+                m_nContacts++;
+                if(m_nContacts >= (m_iFlags & NUMC_MASK)) 
+                    return;;
+            }
+        }
+    }
+    else
+    {
+        for( i=0; i<iTmpCounter1; i++)
+        {
+            dQuatTransform(m_qCylinderRot,avTempArray1[i], vPoint);
+            vPoint[0] += vCylinderCirclePos[0];
+            vPoint[1] += vCylinderCirclePos[1];
+            vPoint[2] += vCylinderCirclePos[2];
+
+            dVector3Subtract(vPoint,m_vCylinderPos,vTemp);
+            ftmpdot	 = dFabs(dVector3Dot(vTemp, m_vContactNormal));
+            fTempDepth = m_fBestrt - ftmpdot;
+            // Depth must be positive
+            if (fTempDepth > REAL(0.0))
+            {
+                m_gLocalContacts[m_nContacts].fDepth = fTempDepth;
+                dVector3Copy(m_vContactNormal,m_gLocalContacts[m_nContacts].vNormal);
+                dVector3Copy(vPoint,m_gLocalContacts[m_nContacts].vPos);
+                m_gLocalContacts[m_nContacts].nFlags = 1;
+                m_nContacts++;
+                if(m_nContacts >= (m_iFlags & NUMC_MASK)) 
+                    return;;
+            }
+        }
+    }
+}
+
+void sCylinderTrimeshColliderData::TestOneTriangleVsCylinder(
+    const dVector3 &v0, 
+    const dVector3 &v1, 
+    const dVector3 &v2, 
+    const bool bDoubleSided)
+{
+    // calculate triangle normal
+    dVector3Subtract( v2 , v1 , m_vE1);
+    dVector3 vTemp;
+    dVector3Subtract( v0 , v1 ,vTemp);
+    dVector3Cross(m_vE1 , vTemp , m_vNormal );
+
+    // Even though all triangles might be initially valid, 
+    // a triangle may degenerate into a segment after applying 
+    // space transformation.
+    if (!dSafeNormalize3( m_vNormal))
+    {
+        return;
+    }
+
+    // create plane from triangle
+    //Plane4f plTrianglePlane = Plane4f( vPolyNormal, v0 ); 
+    dReal plDistance = -dVector3Dot(v0, m_vNormal);
+    dVector4 plTrianglePlane;
+    dConstructPlane( m_vNormal,plDistance,plTrianglePlane);
+
+    // calculate sphere distance to plane
+    dReal fDistanceCylinderCenterToPlane = dPointPlaneDistance(m_vCylinderPos , plTrianglePlane);
+
+    // Sphere must be over positive side of triangle
+    if(fDistanceCylinderCenterToPlane < 0 && !bDoubleSided) 
+    {
+        // if not don't generate contacts
+        return;
+    }
+
+    dVector3 vPnt0;
+    dVector3 vPnt1;
+    dVector3 vPnt2;
+
+    if (fDistanceCylinderCenterToPlane < REAL(0.0) )
+    {
+        // flip it
+        dVector3Copy(v0 , vPnt0);
+        dVector3Copy(v1 , vPnt2);
+        dVector3Copy(v2 , vPnt1);
+    }
+    else
+    {
+        dVector3Copy(v0 , vPnt0);
+        dVector3Copy(v1 , vPnt1);
+        dVector3Copy(v2 , vPnt2);
+    }
+
+    m_fBestDepth = MAX_REAL;
+
+    // do intersection test and find best separating axis
+    if(!_cldTestSeparatingAxes(vPnt0, vPnt1, vPnt2) ) 
+    {
+        // if not found do nothing
+        return;
+    }
+
+    // if best separation axis is not found
+    if ( m_iBestAxis == 0 ) 
+    {
+        // this should not happen (the function should have already returned in this case)
+        dIASSERT(false);
+        // do nothing
+        return;
+    }
+
+    dReal fdot = dVector3Dot( m_vContactNormal , m_vCylinderAxis );
+
+    // choose which clipping method are we going to apply
+    if (dFabs(fdot) < REAL(0.9) ) 
+    {
+        if (!_cldClipCylinderEdgeToTriangle(vPnt0, vPnt1, vPnt2)) 
+        {
+            return;
+        }
+    }
+    else 
+    {
+        _cldClipCylinderToTriangle(vPnt0, vPnt1, vPnt2);
+    }
+}
+
+void sCylinderTrimeshColliderData::_InitCylinderTrimeshData(dxGeom *Cylinder, dxTriMesh *Trimesh)
+{
+    // get cylinder information
+    // Rotation
+    const dReal* pRotCyc = dGeomGetRotation(Cylinder); 
+    dMatrix3Copy(pRotCyc,m_mCylinderRot);
+    dGeomGetQuaternion(Cylinder,m_qCylinderRot);
+
+    // Position
+    const dVector3* pPosCyc = (const dVector3*)dGeomGetPosition(Cylinder);
+    dVector3Copy(*pPosCyc,m_vCylinderPos);
+    // Cylinder axis
+    dMat3GetCol(m_mCylinderRot,nCYLINDER_AXIS,m_vCylinderAxis);
+    // get cylinder radius and size
+    dGeomCylinderGetParams(Cylinder,&m_fCylinderRadius,&m_fCylinderSize);
+
+    // get trimesh position and orientation
+    const dReal* pRotTris = dGeomGetRotation(Trimesh); 
+    dMatrix3Copy(pRotTris,m_mTrimeshRot);
+    dGeomGetQuaternion(Trimesh,m_qTrimeshRot);
+
+    // Position
+    const dVector3* pPosTris = (const dVector3*)dGeomGetPosition(Trimesh);
+    dVector3Copy(*pPosTris,m_vTrimeshPos);
+
+
+    // calculate basic angle for 8-gon
+    dReal fAngle = (dReal) (M_PI / nCYLINDER_CIRCLE_SEGMENTS);
+    // calculate angle increment
+    dReal fAngleIncrement = fAngle*REAL(2.0); 
+
+    // calculate plane normals
+    // axis dependant code
+    for(int i=0; i<nCYLINDER_CIRCLE_SEGMENTS; i++) 
+    {
+        m_avCylinderNormals[i][0] = -dCos(fAngle);
+        m_avCylinderNormals[i][1] = -dSin(fAngle);
+        m_avCylinderNormals[i][2] = REAL(0.0);
+
+        fAngle += fAngleIncrement;
+    }
+
+    dSetZero(m_vBestPoint,4);
+    // reset best depth
+    m_fBestCenter = REAL(0.0);	
+}
+
+int sCylinderTrimeshColliderData::TestCollisionForSingleTriangle(int ctContacts0, 
+                                                                 int Triint, dVector3 dv[3], bool &bOutFinishSearching)
+{
+    // test this triangle
+    TestOneTriangleVsCylinder(dv[0],dv[1],dv[2], false);
+
+    // fill-in tri index for generated contacts
+    for (; ctContacts0<m_nContacts; ctContacts0++)
+        m_gLocalContacts[ctContacts0].triIndex = Triint;
+
+    // Putting "break" at the end of loop prevents unnecessary checks on first pass and "continue"
+    bOutFinishSearching = (m_nContacts >= (m_iFlags & NUMC_MASK));
+
+    return ctContacts0;
+}
+
+// OPCODE version of cylinder to mesh collider
+#if dTRIMESH_OPCODE
+static void dQueryCTLPotentialCollisionTriangles(OBBCollider &Collider, 
+                                                 sCylinderTrimeshColliderData &cData, dxGeom *Cylinder, dxTriMesh *Trimesh,
+                                                 OBBCache &BoxCache)
+{
+    Matrix4x4 MeshMatrix;
+    const dVector3 vZeroVector3 = { REAL(0.0), };
+    MakeMatrix(vZeroVector3, cData.m_mTrimeshRot, MeshMatrix);
+
+    const dVector3 &vCylinderPos = cData.m_vCylinderPos;
+    const dMatrix3 &mCylinderRot = cData.m_mCylinderRot;
+
+    dVector3 vCylinderOffsetPos;
+    dSubtractVectors3(vCylinderOffsetPos, vCylinderPos, cData.m_vTrimeshPos);
+
+    const dReal fCylinderRadius = cData.m_fCylinderRadius, fCylinderHalfAxis = cData.m_fCylinderSize * REAL(0.5);
+
+    OBB obbCylinder;
+    obbCylinder.mCenter.Set(vCylinderOffsetPos[0], vCylinderOffsetPos[1], vCylinderOffsetPos[2]);
+    obbCylinder.mExtents.Set(
+        0 == nCYLINDER_AXIS ? fCylinderHalfAxis : fCylinderRadius,
+        1 == nCYLINDER_AXIS ? fCylinderHalfAxis : fCylinderRadius,
+        2 == nCYLINDER_AXIS ? fCylinderHalfAxis : fCylinderRadius);
+    obbCylinder.mRot.Set(
+        mCylinderRot[0], mCylinderRot[4], mCylinderRot[8],
+        mCylinderRot[1], mCylinderRot[5], mCylinderRot[9],
+        mCylinderRot[2], mCylinderRot[6], mCylinderRot[10]);
+
+    // TC results
+    if (Trimesh->getDoTC(dxTriMesh::TTC_BOX)) 
+    {
+        dxTriMesh::BoxTC* BoxTC = 0;
+        const int iBoxCacheSize = Trimesh->m_BoxTCCache.size();
+        for (int i = 0; i != iBoxCacheSize; i++)
+        {
+            if (Trimesh->m_BoxTCCache[i].Geom == Cylinder)
+            {
+                BoxTC = &Trimesh->m_BoxTCCache[i];
+                break;
+            }
+        }
+        if (!BoxTC)
+        {
+            Trimesh->m_BoxTCCache.push(dxTriMesh::BoxTC());
+
+            BoxTC = &Trimesh->m_BoxTCCache[Trimesh->m_BoxTCCache.size() - 1];
+            BoxTC->Geom = Cylinder;
+            BoxTC->FatCoeff = REAL(1.0);
+        }
+
+        // Intersect
+        Collider.SetTemporalCoherence(true);
+        Collider.Collide(*BoxTC, obbCylinder, Trimesh->retrieveMeshBVTreeRef(), null, &MeshMatrix);
+    }
+    else 
+    {
+        Collider.SetTemporalCoherence(false);
+        Collider.Collide(BoxCache, obbCylinder, Trimesh->retrieveMeshBVTreeRef(), null, &MeshMatrix);
+    }
+}
+
+int dCollideCylinderTrimesh(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT( skip >= (int)sizeof( dContactGeom ) );
+    dIASSERT( o1->type == dCylinderClass );
+    dIASSERT( o2->type == dTriMeshClass );
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    int nContactCount = 0;
+
+    dxGeom *Cylinder = o1;
+    dxTriMesh *Trimesh = (dxTriMesh *)o2;
+
+    // Main data holder
+    sCylinderTrimeshColliderData cData(flags, skip);
+    cData._InitCylinderTrimeshData(Cylinder, Trimesh);
+
+    const unsigned uiTLSKind = Trimesh->getParentSpaceTLSKind();
+    dIASSERT(uiTLSKind == Cylinder->getParentSpaceTLSKind()); // The colliding spaces must use matching cleanup method
+    TrimeshCollidersCache *pccColliderCache = GetTrimeshCollidersCache(uiTLSKind);
+    OBBCollider& Collider = pccColliderCache->m_OBBCollider;
+
+    dQueryCTLPotentialCollisionTriangles(Collider, cData, Cylinder, Trimesh, pccColliderCache->m_DefaultBoxCache);
+
+    // Retrieve data
+    int TriCount = Collider.GetNbTouchedPrimitives();
+
+    if (TriCount != 0)
+    {
+        const int* Triangles = (const int*)Collider.GetTouchedPrimitives();
+
+        if (Trimesh->m_ArrayCallback != NULL)
+        {
+            Trimesh->m_ArrayCallback(Trimesh, Cylinder, Triangles, TriCount);
+        }
+
+        // allocate buffer for local contacts on stack
+        cData.m_gLocalContacts = (sLocalContactData*)dALLOCA16(sizeof(sLocalContactData)*(cData.m_iFlags & NUMC_MASK));
+
+        int ctContacts0 = 0;
+
+        // loop through all intersecting triangles
+        for (int i = 0; i < TriCount; i++)
+        {
+            const int Triint = Triangles[i];
+            if (!Trimesh->invokeCallback(Cylinder, Triint)) continue;
+
+
+            dVector3 dv[3];
+            Trimesh->fetchMeshTriangle(dv, Triint, cData.m_vTrimeshPos, cData.m_mTrimeshRot);
+
+            bool bFinishSearching;
+            ctContacts0 = cData.TestCollisionForSingleTriangle(ctContacts0, Triint, dv, bFinishSearching);
+
+            if (bFinishSearching) 
+            {
+                break;
+            }
+        }
+
+        if (cData.m_nContacts != 0)
+        {
+            nContactCount = cData._ProcessLocalContacts(contact, Cylinder, Trimesh);
+        }
+    }
+
+    return nContactCount;
+}
+#endif
+
+// GIMPACT version of cylinder to mesh collider
+#if dTRIMESH_GIMPACT
+int dCollideCylinderTrimesh(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT( skip >= (int)sizeof( dContactGeom ) );
+    dIASSERT( o1->type == dCylinderClass );
+    dIASSERT( o2->type == dTriMeshClass );
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    int nContactCount = 0;
+
+    dxGeom *Cylinder = o1;
+    dxTriMesh *Trimesh = (dxTriMesh *)o2;
+
+    // Main data holder
+    sCylinderTrimeshColliderData cData(flags, skip);
+    cData._InitCylinderTrimeshData(Cylinder, Trimesh);
+
+    //*****at first , collide box aabb******//
+
+    aabb3f test_aabb(o1->aabb[0], o1->aabb[1], o1->aabb[2], o1->aabb[3], o1->aabb[4], o1->aabb[5]);
+
+
+    GDYNAMIC_ARRAY collision_result;
+    GIM_CREATE_BOXQUERY_LIST(collision_result);
+
+    gim_aabbset_box_collision(&test_aabb, &Trimesh->m_collision_trimesh.m_aabbset , &collision_result);
+
+    if (collision_result.m_size != 0)
+    {
+        //*****Set globals for box collision******//
+
+        int ctContacts0 = 0;
+        cData.m_gLocalContacts = (sLocalContactData*)dALLOCA16(sizeof(sLocalContactData)*(cData.m_iFlags & NUMC_MASK));
+
+        GUINT32 * boxesresult = GIM_DYNARRAY_POINTER(GUINT32,collision_result);
+        GIM_TRIMESH * ptrimesh = &Trimesh->m_collision_trimesh;
+
+        gim_trimesh_locks_work_data(ptrimesh);
+
+        for(unsigned int i=0;i<collision_result.m_size;i++)
+        {
+            const int Triint = boxesresult[i];
+
+            dVector3 dv[3];
+            gim_trimesh_get_triangle_vertices(ptrimesh, Triint, dv[0], dv[1], dv[2]);
+
+            bool bFinishSearching;
+            ctContacts0 = cData.TestCollisionForSingleTriangle(ctContacts0, Triint, dv, bFinishSearching);
+
+            if (bFinishSearching) 
+            {
+                break;
+            }
+        }
+
+        gim_trimesh_unlocks_work_data(ptrimesh);
+
+        if (cData.m_nContacts != 0)
+        {
+            nContactCount = cData._ProcessLocalContacts(contact, Cylinder, Trimesh);
+        }
+    }
+
+    GIM_DYNARRAY_DESTROY(collision_result);
+
+    return nContactCount;
+}
+#endif
+
+#endif // dTRIMESH_ENABLED
+
+
diff --git a/libs/ode-0.16.1/ode/src/collision_kernel.cpp b/libs/ode-0.16.1/ode/src/collision_kernel.cpp
new file mode 100644
index 0000000..527941a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_kernel.cpp
@@ -0,0 +1,1247 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+core collision functions and data structures, plus part of the public API
+for geometry objects
+
+*/
+
+#include <ode/common.h>
+#include <ode/rotation.h>
+#include <ode/objects.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"
+#include "collision_util.h"
+#include "collision_std.h"
+#include "collision_transform.h"
+#include "collision_trimesh_internal.h"
+#include "collision_space_internal.h"
+#include "odeou.h"
+
+#ifdef dLIBCCD_ENABLED
+# include "collision_libccd.h"
+#endif /* dLIBCCD_ENABLED */
+
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+//****************************************************************************
+// helper functions for dCollide()ing a space with another geom
+
+// this struct records the parameters passed to dCollideSpaceGeom()
+
+#if dATOMICS_ENABLED 
+static volatile atomicptr s_cachedPosR = 0; // dxPosR *
+#endif // dATOMICS_ENABLED
+
+static inline dxPosR* dAllocPosr()
+{
+    dxPosR *retPosR;
+
+#if dATOMICS_ENABLED
+    retPosR = (dxPosR *)AtomicExchangePointer(&s_cachedPosR, NULL);
+
+    if (!retPosR)
+#endif
+    {
+        retPosR = (dxPosR*) dAlloc (sizeof(dxPosR));
+    }
+
+    return retPosR;
+}
+
+static inline void dFreePosr(dxPosR *oldPosR)
+{
+#if dATOMICS_ENABLED
+    if (!AtomicCompareExchangePointer(&s_cachedPosR, NULL, (atomicptr)oldPosR))
+#endif
+    {
+        dFree(oldPosR, sizeof(dxPosR));
+    }
+}
+
+/*extern */void dClearPosrCache(void)
+{
+#if dATOMICS_ENABLED
+    // No threads should be accessing ODE at this time already,
+    // hence variable may be read directly.
+    dxPosR *existingPosR = (dxPosR *)s_cachedPosR;
+
+    if (existingPosR)
+    {
+        dFree(existingPosR, sizeof(dxPosR));
+
+        s_cachedPosR = 0;
+    }
+#endif
+}
+
+struct SpaceGeomColliderData {
+    int flags;			// space left in contacts array
+    dContactGeom *contact;
+    int skip;
+};
+
+
+static void space_geom_collider (void *data, dxGeom *o1, dxGeom *o2)
+{
+    SpaceGeomColliderData *d = (SpaceGeomColliderData*) data;
+    if (d->flags & NUMC_MASK) {
+        int n = dCollide (o1,o2,d->flags,d->contact,d->skip);
+        d->contact = CONTACT (d->contact,d->skip*n);
+        d->flags -= n;
+    }
+}
+
+
+static int dCollideSpaceGeom (dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    SpaceGeomColliderData data;
+    data.flags = flags;
+    data.contact = contact;
+    data.skip = skip;
+    dSpaceCollide2 (o1,o2,&data,&space_geom_collider);
+    return (flags & NUMC_MASK) - (data.flags & NUMC_MASK);
+}
+
+//****************************************************************************
+// dispatcher for the N^2 collider functions
+
+// function pointers and modes for n^2 class collider functions
+
+struct dColliderEntry {
+    dColliderFn *fn;	// collider function, 0 = no function available
+    int reverse;		// 1 = reverse o1 and o2
+};
+static dColliderEntry colliders[dGeomNumClasses][dGeomNumClasses];
+static int colliders_initialized = 0;
+
+
+// setCollider() will refuse to write over a collider entry once it has
+// been written.
+
+static void setCollider (int i, int j, dColliderFn *fn)
+{
+    if (colliders[i][j].fn == 0) {
+        colliders[i][j].fn = fn;
+        colliders[i][j].reverse = 0;
+    }
+    if (colliders[j][i].fn == 0) {
+        colliders[j][i].fn = fn;
+        colliders[j][i].reverse = 1;
+    }
+}
+
+
+static void setAllColliders (int i, dColliderFn *fn)
+{
+    for (int j=0; j<dGeomNumClasses; j++) setCollider (i,j,fn);
+}
+
+/*extern */void dInitColliders()
+{
+    dIASSERT(!colliders_initialized);
+    colliders_initialized = 1;
+
+    memset (colliders,0,sizeof(colliders));
+
+    int i,j;
+
+    // setup space colliders
+    for (i=dFirstSpaceClass; i <= dLastSpaceClass; i++) {
+        for (j=0; j < dGeomNumClasses; j++) {
+            setCollider (i,j,&dCollideSpaceGeom);
+        }
+    }
+
+    setCollider (dSphereClass,dSphereClass,&dCollideSphereSphere);
+    setCollider (dSphereClass,dBoxClass,&dCollideSphereBox);
+    setCollider (dSphereClass,dPlaneClass,&dCollideSpherePlane);
+    setCollider (dBoxClass,dBoxClass,&dCollideBoxBox);
+    setCollider (dBoxClass,dPlaneClass,&dCollideBoxPlane);
+    setCollider (dCapsuleClass,dSphereClass,&dCollideCapsuleSphere);
+    setCollider (dCapsuleClass,dBoxClass,&dCollideCapsuleBox);
+    setCollider (dCapsuleClass,dCapsuleClass,&dCollideCapsuleCapsule);
+    setCollider (dCapsuleClass,dPlaneClass,&dCollideCapsulePlane);
+    setCollider (dRayClass,dSphereClass,&dCollideRaySphere);
+    setCollider (dRayClass,dBoxClass,&dCollideRayBox);
+    setCollider (dRayClass,dCapsuleClass,&dCollideRayCapsule);
+    setCollider (dRayClass,dPlaneClass,&dCollideRayPlane);
+    setCollider (dRayClass,dCylinderClass,&dCollideRayCylinder);
+#if dTRIMESH_ENABLED
+    setCollider (dTriMeshClass,dSphereClass,&dCollideSTL);
+    setCollider (dTriMeshClass,dBoxClass,&dCollideBTL);
+    setCollider (dTriMeshClass,dRayClass,&dCollideRTL);
+    setCollider (dTriMeshClass,dTriMeshClass,&dCollideTTL);
+    setCollider (dTriMeshClass,dCapsuleClass,&dCollideCCTL);
+    setCollider (dTriMeshClass,dPlaneClass,&dCollideTrimeshPlane);
+    setCollider (dCylinderClass,dTriMeshClass,&dCollideCylinderTrimesh);
+    setCollider (dConvexClass,dTriMeshClass,&dCollideConvexTrimesh);
+#endif
+
+#ifdef dLIBCCD_BOX_CYL
+    setCollider (dBoxClass,dCylinderClass,&dCollideBoxCylinderCCD);
+#else
+    setCollider (dCylinderClass,dBoxClass,&dCollideCylinderBox);
+#endif
+    setCollider (dCylinderClass,dSphereClass,&dCollideCylinderSphere);
+    setCollider (dCylinderClass,dPlaneClass,&dCollideCylinderPlane);
+
+#ifdef dLIBCCD_CYL_CYL
+    setCollider (dCylinderClass, dCylinderClass, &dCollideCylinderCylinder);
+#endif
+#ifdef dLIBCCD_CAP_CYL
+    setCollider (dCapsuleClass, dCylinderClass, &dCollideCapsuleCylinder);
+#endif
+
+    //--> Convex Collision
+#ifdef dLIBCCD_CONVEX_BOX
+    setCollider (dConvexClass, dBoxClass, &dCollideConvexBoxCCD);
+#else
+    setCollider (dConvexClass,dBoxClass,&dCollideConvexBox);
+#endif
+
+#ifdef dLIBCCD_CONVEX_CAP
+    setCollider (dConvexClass,dCapsuleClass,&dCollideConvexCapsuleCCD);
+#else
+    setCollider (dConvexClass,dCapsuleClass,&dCollideConvexCapsule);
+#endif
+
+#ifdef dLIBCCD_CONVEX_CYL
+    setCollider (dConvexClass,dCylinderClass,&dCollideConvexCylinderCCD);
+#endif
+
+#ifdef dLIBCCD_CONVEX_SPHERE
+    setCollider (dConvexClass,dSphereClass,&dCollideConvexSphereCCD);
+#else
+    setCollider (dSphereClass,dConvexClass,&dCollideSphereConvex);
+#endif
+
+#ifdef dLIBCCD_CONVEX_CONVEX
+    setCollider (dConvexClass,dConvexClass,&dCollideConvexConvexCCD);
+#else
+    setCollider (dConvexClass,dConvexClass,&dCollideConvexConvex);
+#endif
+
+    setCollider (dConvexClass,dPlaneClass,&dCollideConvexPlane);
+    setCollider (dRayClass,dConvexClass,&dCollideRayConvex);
+    //<-- Convex Collision
+
+    //--> dHeightfield Collision
+    setCollider (dHeightfieldClass,dRayClass,&dCollideHeightfield);
+    setCollider (dHeightfieldClass,dSphereClass,&dCollideHeightfield);
+    setCollider (dHeightfieldClass,dBoxClass,&dCollideHeightfield);
+    setCollider (dHeightfieldClass,dCapsuleClass,&dCollideHeightfield);
+    setCollider (dHeightfieldClass,dCylinderClass,&dCollideHeightfield);
+    setCollider (dHeightfieldClass,dConvexClass,&dCollideHeightfield);
+#if dTRIMESH_ENABLED
+    setCollider (dHeightfieldClass,dTriMeshClass,&dCollideHeightfield);
+#endif
+    //<-- dHeightfield Collision
+
+    setAllColliders (dGeomTransformClass,&dCollideTransform);
+}
+
+/*extern */void dFinitColliders()
+{
+    colliders_initialized = 0;
+}
+
+void dSetColliderOverride (int i, int j, dColliderFn *fn)
+{
+    dIASSERT( colliders_initialized );
+    dAASSERT( i < dGeomNumClasses );
+    dAASSERT( j < dGeomNumClasses );
+
+    colliders[i][j].fn = fn;
+    colliders[i][j].reverse = 0;
+    colliders[j][i].fn = fn;
+    colliders[j][i].reverse = 1;
+}
+
+/*
+*	NOTE!
+*	If it is necessary to add special processing mode without contact generation
+*	use NULL contact parameter value as indicator, not zero in flags.
+*/
+int dCollide (dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    dAASSERT(o1 && o2 && contact);
+    dUASSERT(colliders_initialized,"Please call ODE initialization (dInitODE() or similar) before using the library");
+    dUASSERT(o1->type >= 0 && o1->type < dGeomNumClasses,"bad o1 class number");
+    dUASSERT(o2->type >= 0 && o2->type < dGeomNumClasses,"bad o2 class number");
+    // Even though comparison for greater or equal to one is used in all the 
+    // other places, here it is more logical to check for greater than zero
+    // because function does not require any specific number of contact slots - 
+    // it must be just a positive.
+    dUASSERT((flags & NUMC_MASK) > 0, "no contacts requested"); 
+
+    // Extra precaution for zero contact count in parameters
+    if ((flags & NUMC_MASK) == 0) return 0;
+    // no contacts if both geoms are the same
+    if (o1 == o2) return 0;
+
+    // no contacts if both geoms on the same body, and the body is not 0
+    if (o1->body == o2->body && o1->body) return 0;
+
+    o1->recomputePosr();
+    o2->recomputePosr();
+
+    dColliderEntry *ce = &colliders[o1->type][o2->type];
+    int count = 0;
+    if (ce->fn) {
+        if (ce->reverse) {
+            count = (*ce->fn) (o2,o1,flags,contact,skip);
+            for (int i=0; i<count; i++) {
+                dContactGeom *c = CONTACT(contact,skip*i);
+                c->normal[0] = -c->normal[0];
+                c->normal[1] = -c->normal[1];
+                c->normal[2] = -c->normal[2];
+                dxGeom *tmp = c->g1;
+                c->g1 = c->g2;
+                c->g2 = tmp;
+                int tmpint = c->side1;
+                c->side1 = c->side2;
+                c->side2 = tmpint;
+            }
+        }
+        else {
+            count = (*ce->fn) (o1,o2,flags,contact,skip);
+        }
+    }
+    return count;
+}
+
+//****************************************************************************
+// dxGeom
+
+dxGeom::dxGeom (dSpaceID _space, int is_placeable)
+{
+    // setup body vars. invalid type of -1 must be changed by the constructor.
+    type = -1;
+    gflags = GEOM_DIRTY | GEOM_AABB_BAD | GEOM_ENABLED;
+    if (is_placeable) gflags |= GEOM_PLACEABLE;
+    data = 0;
+    body = 0;
+    body_next = 0;
+    if (is_placeable) {
+        final_posr = dAllocPosr();
+        dSetZero (final_posr->pos,4);
+        dRSetIdentity (final_posr->R);
+    }
+    else {
+        final_posr = 0;
+    }
+    offset_posr = 0;
+
+    // setup space vars
+    next = 0;
+    tome = 0;
+    next_ex = 0;
+    tome_ex = 0;
+    parent_space = 0;
+    dSetZero (aabb,6);
+    category_bits = ~0;
+    collide_bits = ~0;
+
+    // put this geom in a space if required
+    if (_space) dSpaceAdd (_space,this);
+}
+
+
+dxGeom::~dxGeom()
+{
+    if (parent_space) dSpaceRemove (parent_space,this);
+    if ((gflags & GEOM_PLACEABLE) && (!body || (body && offset_posr)))
+        dFreePosr(final_posr);
+    if (offset_posr) dFreePosr(offset_posr);
+    bodyRemove();
+}
+
+unsigned dxGeom::getParentSpaceTLSKind() const
+{
+    return parent_space ? parent_space->tls_kind : dSPACE_TLS_KIND_INIT_VALUE;
+}
+
+int dxGeom::AABBTest (dxGeom *, dReal [6])
+{
+    return 1;
+}
+
+
+void dxGeom::bodyRemove()
+{
+    if (body) {
+        // delete this geom from body list
+        dxGeom **last = &body->geom, *g = body->geom;
+        while (g) {
+            if (g == this) {
+                *last = g->body_next;
+                break;
+            }
+            last = &g->body_next;
+            g = g->body_next;
+        }
+        body = 0;
+        body_next = 0;
+    }
+}
+
+inline void myswap(dReal& a, dReal& b) { dReal t=b; b=a; a=t; }
+
+
+inline void matrixInvert(const dMatrix3& inMat, dMatrix3& outMat)
+{
+    memcpy(outMat, inMat, sizeof(dMatrix3));
+    // swap _12 and _21
+    myswap(outMat[0 + 4*1], outMat[1 + 4*0]);
+    // swap _31 and _13
+    myswap(outMat[2 + 4*0], outMat[0 + 4*2]);
+    // swap _23 and _32
+    myswap(outMat[1 + 4*2], outMat[2 + 4*1]);
+}
+
+void getBodyPosr(const dxPosR& offset_posr, const dxPosR& final_posr, dxPosR& body_posr)
+{
+    dMatrix3 inv_offset;
+    matrixInvert(offset_posr.R, inv_offset);
+
+    dMultiply0_333(body_posr.R, final_posr.R, inv_offset);
+    dVector3 world_offset;
+    dMultiply0_331(world_offset, body_posr.R, offset_posr.pos);
+    body_posr.pos[0] = final_posr.pos[0] - world_offset[0];
+    body_posr.pos[1] = final_posr.pos[1] - world_offset[1];
+    body_posr.pos[2] = final_posr.pos[2] - world_offset[2];
+}
+
+void getWorldOffsetPosr(const dxPosR& body_posr, const dxPosR& world_posr, dxPosR& offset_posr)
+{
+    dMatrix3 inv_body;
+    matrixInvert(body_posr.R, inv_body);
+
+    dMultiply0_333(offset_posr.R, inv_body, world_posr.R);
+    dVector3 world_offset;
+    world_offset[0] = world_posr.pos[0] - body_posr.pos[0];
+    world_offset[1] = world_posr.pos[1] - body_posr.pos[1];
+    world_offset[2] = world_posr.pos[2] - body_posr.pos[2];
+    dMultiply0_331(offset_posr.pos, inv_body, world_offset);
+}
+
+void dxGeom::computePosr()
+{
+    // should only be recalced if we need to - ie offset from a body
+    dIASSERT(offset_posr);  
+    dIASSERT(body);
+
+    dMultiply0_331 (final_posr->pos,body->posr.R,offset_posr->pos);
+    final_posr->pos[0] += body->posr.pos[0];
+    final_posr->pos[1] += body->posr.pos[1];
+    final_posr->pos[2] += body->posr.pos[2];
+    dMultiply0_333 (final_posr->R,body->posr.R,offset_posr->R);
+}
+
+bool dxGeom::controlGeometry(int /*controlClass*/, int /*controlCode*/, void * /*dataValue*/, int *dataSize)
+{
+    dAASSERT(false && "Control class/code is not supported for current geom");
+
+    *dataSize = 0;
+    return false;
+}
+
+//****************************************************************************
+// misc
+
+dxGeom *dGeomGetBodyNext (dxGeom *geom)
+{
+    return geom->body_next;
+}
+
+//****************************************************************************
+// public API for geometry objects
+
+void dGeomDestroy (dxGeom *g)
+{
+    dAASSERT (g);
+    delete g;
+}
+
+
+void dGeomSetData (dxGeom *g, void *data)
+{
+    dAASSERT (g);
+    g->data = data;
+}
+
+
+void *dGeomGetData (dxGeom *g)
+{
+    dAASSERT (g);
+    return g->data;
+}
+
+
+void dGeomSetBody (dxGeom *g, dxBody *b)
+{
+    dAASSERT (g);
+    dUASSERT (b == NULL || (g->gflags & GEOM_PLACEABLE),"geom must be placeable");
+    CHECK_NOT_LOCKED (g->parent_space);
+
+    if (b) {
+        if (!g->body) dFreePosr(g->final_posr);
+        if (g->body != b) {
+            if (g->offset_posr) {
+                dFreePosr(g->offset_posr);
+                g->offset_posr = 0;
+            }
+            g->final_posr = &b->posr;
+            g->bodyRemove();
+            g->bodyAdd (b);
+        }
+        dGeomMoved (g);
+    }
+    else {
+        if (g->body) {
+            if (g->offset_posr)
+            {
+                // if we're offset, we already have our own final position, make sure its updated
+                g->recomputePosr();
+                dFreePosr(g->offset_posr);
+                g->offset_posr = 0;
+            }
+            else
+            {
+                g->final_posr = dAllocPosr();
+                memcpy (g->final_posr->pos,g->body->posr.pos,sizeof(dVector3));
+                memcpy (g->final_posr->R,g->body->posr.R,sizeof(dMatrix3));
+            }
+            g->bodyRemove();
+        }
+        // dGeomMoved() should not be called if the body is being set to 0, as the
+        // new position of the geom is set to the old position of the body, so the
+        // effective position of the geom remains unchanged.
+    }
+}
+
+
+dBodyID dGeomGetBody (dxGeom *g)
+{
+    dAASSERT (g);
+    return g->body;
+}
+
+
+void dGeomSetPosition (dxGeom *g, dReal x, dReal y, dReal z)
+{
+    dAASSERT (g);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    CHECK_NOT_LOCKED (g->parent_space);
+    if (g->offset_posr) {
+        // move body such that body+offset = position
+        dVector3 world_offset;
+        dMultiply0_331(world_offset, g->body->posr.R, g->offset_posr->pos);
+        dBodySetPosition(g->body,
+            x - world_offset[0],
+            y - world_offset[1],
+            z - world_offset[2]);
+    }
+    else if (g->body) {
+        // this will call dGeomMoved (g), so we don't have to
+        dBodySetPosition (g->body,x,y,z);
+    }
+    else {
+        g->final_posr->pos[0] = x;
+        g->final_posr->pos[1] = y;
+        g->final_posr->pos[2] = z;
+        dGeomMoved (g);
+    }
+}
+
+
+void dGeomSetRotation (dxGeom *g, const dMatrix3 R)
+{
+    dAASSERT (g && R);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    CHECK_NOT_LOCKED (g->parent_space);
+    if (g->offset_posr) {
+        g->recomputePosr();
+        // move body such that body+offset = rotation
+        dxPosR new_final_posr;
+        dxPosR new_body_posr;
+        memcpy(new_final_posr.pos, g->final_posr->pos, sizeof(dVector3));
+        memcpy(new_final_posr.R, R, sizeof(dMatrix3));
+        getBodyPosr(*g->offset_posr, new_final_posr, new_body_posr);
+        dBodySetRotation(g->body, new_body_posr.R);
+        dBodySetPosition(g->body, new_body_posr.pos[0], new_body_posr.pos[1], new_body_posr.pos[2]);
+    }
+    else if (g->body) {
+        // this will call dGeomMoved (g), so we don't have to
+        dBodySetRotation (g->body,R);
+    }
+    else {
+        memcpy (g->final_posr->R,R,sizeof(dMatrix3));
+        dGeomMoved (g);
+    }
+}
+
+
+void dGeomSetQuaternion (dxGeom *g, const dQuaternion quat)
+{
+    dAASSERT (g && quat);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    CHECK_NOT_LOCKED (g->parent_space);
+    if (g->offset_posr) {
+        g->recomputePosr();
+        // move body such that body+offset = rotation
+        dxPosR new_final_posr;
+        dxPosR new_body_posr;
+        dQtoR (quat, new_final_posr.R);
+        memcpy(new_final_posr.pos, g->final_posr->pos, sizeof(dVector3));
+
+        getBodyPosr(*g->offset_posr, new_final_posr, new_body_posr);
+        dBodySetRotation(g->body, new_body_posr.R);
+        dBodySetPosition(g->body, new_body_posr.pos[0], new_body_posr.pos[1], new_body_posr.pos[2]);
+    }
+    if (g->body) {
+        // this will call dGeomMoved (g), so we don't have to
+        dBodySetQuaternion (g->body,quat);
+    }
+    else {
+        dQtoR (quat, g->final_posr->R);
+        dGeomMoved (g);
+    }
+}
+
+
+const dReal * dGeomGetPosition (dxGeom *g)
+{
+    dAASSERT (g);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    
+    return g->buildUpdatedPosition();
+}
+
+
+void dGeomCopyPosition(dxGeom *g, dVector3 pos)
+{
+    dAASSERT (g);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    
+    const dVector3 &src = g->buildUpdatedPosition();
+    pos[0] = src[dV3E_X];
+    pos[1] = src[dV3E_Y];
+    pos[2] = src[dV3E_Z];
+}
+
+
+const dReal * dGeomGetRotation (dxGeom *g)
+{
+    dAASSERT (g);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    
+    return g->buildUpdatedRotation();
+}
+
+
+void dGeomCopyRotation(dxGeom *g, dMatrix3 R)
+{
+    dAASSERT (g);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+
+    const dMatrix3 &src = g->buildUpdatedRotation();
+    R[0]  = src[dM3E_XX];
+    R[1]  = src[dM3E_XY];
+    R[2]  = src[dM3E_XZ];
+    R[4]  = src[dM3E_YX];
+    R[5]  = src[dM3E_YY];
+    R[6]  = src[dM3E_YZ];
+    R[8]  = src[dM3E_ZX];
+    R[9]  = src[dM3E_ZY];
+    R[10] = src[dM3E_ZZ];
+}
+
+
+void dGeomGetQuaternion (dxGeom *g, dQuaternion quat)
+{
+    dAASSERT (g);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    if (g->body && !g->offset_posr) {
+        const dReal * body_quat = dBodyGetQuaternion (g->body);
+        quat[0] = body_quat[0];
+        quat[1] = body_quat[1];
+        quat[2] = body_quat[2];
+        quat[3] = body_quat[3];
+    }
+    else {
+        g->recomputePosr();
+        dRtoQ (g->final_posr->R, quat);
+    }
+}
+
+
+void dGeomGetAABB (dxGeom *g, dReal aabb[6])
+{
+    dAASSERT (g);
+    dAASSERT (aabb);
+    g->recomputeAABB();
+    memcpy (aabb,g->aabb,6 * sizeof(dReal));
+}
+
+
+int dGeomIsSpace (dxGeom *g)
+{
+    dAASSERT (g);
+    return IS_SPACE(g);
+}
+
+
+dSpaceID dGeomGetSpace (dxGeom *g)
+{
+    dAASSERT (g);
+    return g->parent_space;
+}
+
+
+int dGeomGetClass (dxGeom *g)
+{
+    dAASSERT (g);
+    return g->type;
+}
+
+
+void dGeomSetCategoryBits (dxGeom *g, unsigned long bits)
+{
+    dAASSERT (g);
+    CHECK_NOT_LOCKED (g->parent_space);
+    g->category_bits = bits;
+}
+
+
+void dGeomSetCollideBits (dxGeom *g, unsigned long bits)
+{
+    dAASSERT (g);
+    CHECK_NOT_LOCKED (g->parent_space);
+    g->collide_bits = bits;
+}
+
+
+unsigned long dGeomGetCategoryBits (dxGeom *g)
+{
+    dAASSERT (g);
+    return g->category_bits;
+}
+
+
+unsigned long dGeomGetCollideBits (dxGeom *g)
+{
+    dAASSERT (g);
+    return g->collide_bits;
+}
+
+
+void dGeomEnable (dxGeom *g)
+{
+    dAASSERT (g);
+    g->gflags |= GEOM_ENABLED;
+}
+
+void dGeomDisable (dxGeom *g)
+{
+    dAASSERT (g);
+    g->gflags &= ~GEOM_ENABLED;
+}
+
+int dGeomIsEnabled (dxGeom *g)
+{
+    dAASSERT (g);
+    return (g->gflags & GEOM_ENABLED) != 0;
+}
+
+
+void dGeomGetRelPointPos (dGeomID g, dReal px, dReal py, dReal pz, dVector3 result)
+{
+    dAASSERT (g);
+
+    if ((g->gflags & GEOM_PLACEABLE) == 0) {
+        result[0] = px;
+        result[1] = py;
+        result[2] = pz;
+        return;
+    }
+
+    g->recomputePosr();
+
+    dVector3 prel,p;
+    prel[0] = px;
+    prel[1] = py;
+    prel[2] = pz;
+    prel[3] = 0;
+    dMultiply0_331 (p,g->final_posr->R,prel);
+    result[0] = p[0] + g->final_posr->pos[0];
+    result[1] = p[1] + g->final_posr->pos[1];
+    result[2] = p[2] + g->final_posr->pos[2];
+}
+
+
+void dGeomGetPosRelPoint (dGeomID g, dReal px, dReal py, dReal pz, dVector3 result)
+{
+    dAASSERT (g);
+    if ((g->gflags & GEOM_PLACEABLE) == 0) {
+        result[0] = px;
+        result[1] = py;
+        result[2] = pz;
+        return;
+    }
+
+    g->recomputePosr();
+
+    dVector3 prel;
+    prel[0] = px - g->final_posr->pos[0];
+    prel[1] = py - g->final_posr->pos[1];
+    prel[2] = pz - g->final_posr->pos[2];
+    prel[3] = 0;
+    dMultiply1_331 (result,g->final_posr->R,prel);
+}
+
+
+void dGeomVectorToWorld (dGeomID g, dReal px, dReal py, dReal pz, dVector3 result)
+{
+    dAASSERT (g);
+    if ((g->gflags & GEOM_PLACEABLE) == 0) {
+        result[0] = px;
+        result[1] = py;
+        result[2] = pz;
+        return;
+    }
+
+    g->recomputePosr();
+
+    dVector3 p;
+    p[0] = px;
+    p[1] = py;
+    p[2] = pz;
+    p[3] = 0;
+    dMultiply0_331 (result,g->final_posr->R,p);
+}
+
+
+void dGeomVectorFromWorld (dGeomID g, dReal px, dReal py, dReal pz, dVector3 result)
+{
+    dAASSERT (g);
+    if ((g->gflags & GEOM_PLACEABLE) == 0) {
+        result[0] = px;
+        result[1] = py;
+        result[2] = pz;
+        return;
+    }
+
+    g->recomputePosr();
+
+    dVector3 p;
+    p[0] = px;
+    p[1] = py;
+    p[2] = pz;
+    p[3] = 0;
+    dMultiply1_331 (result,g->final_posr->R,p);
+}
+
+
+
+int dGeomLowLevelControl (dxGeom *g, int controlClass, int controlCode, void *dataValue, int *dataSize)
+{
+    dAASSERT (g);
+    dAASSERT (dataSize);
+
+    if (!dataSize) {
+        return false;
+    }
+
+    bool result = g->controlGeometry(controlClass, controlCode, dataValue, dataSize);
+    return result;
+}
+
+//****************************************************************************
+// C interface that lets the user make new classes. this interface is a lot
+// more cumbersome than C++ subclassing, which is what is used internally
+// in ODE. this API is mainly to support legacy code.
+
+static int num_user_classes = 0;
+static dGeomClass user_classes [dMaxUserClasses];
+
+
+struct dxUserGeom : public dxGeom {
+    void *user_data;
+
+    dxUserGeom (int class_num);
+    ~dxUserGeom();
+    void computeAABB();
+    int AABBTest (dxGeom *o, dReal aabb[6]);
+};
+
+
+dxUserGeom::dxUserGeom (int class_num) : dxGeom (0,1)
+{
+    type = class_num;
+    int size = user_classes[type-dFirstUserClass].bytes;
+    user_data = dAlloc (size);
+    memset (user_data,0,size);
+}
+
+
+dxUserGeom::~dxUserGeom()
+{
+    dGeomClass *c = &user_classes[type-dFirstUserClass];
+    if (c->dtor) c->dtor (this);
+    dFree (user_data,c->bytes);
+}
+
+
+void dxUserGeom::computeAABB()
+{
+    user_classes[type-dFirstUserClass].aabb (this,aabb);
+}
+
+
+int dxUserGeom::AABBTest (dxGeom *o, dReal aabb[6])
+{
+    dGeomClass *c = &user_classes[type-dFirstUserClass];
+    if (c->aabb_test) return c->aabb_test (this,o,aabb);
+    else return 1;
+}
+
+
+static int dCollideUserGeomWithGeom (dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    // this generic collider function is called the first time that a user class
+    // tries to collide against something. it will find out the correct collider
+    // function and then set the colliders array so that the correct function is
+    // called directly the next time around.
+
+    int t1 = o1->type;	// note that o1 is a user geom
+    int t2 = o2->type;	// o2 *may* be a user geom
+
+    // find the collider function to use. if o1 does not know how to collide with
+    // o2, then o2 might know how to collide with o1 (provided that it is a user
+    // geom).
+    dColliderFn *fn = user_classes[t1-dFirstUserClass].collider (t2);
+    int reverse = 0;
+    if (!fn && t2 >= dFirstUserClass && t2 <= dLastUserClass) {
+        fn = user_classes[t2-dFirstUserClass].collider (t1);
+        reverse = 1;
+    }
+
+    // set the colliders array so that the correct function is called directly
+    // the next time around. note that fn can be 0 here if no collider was found,
+    // which means that dCollide() will always return 0 for this case.
+    colliders[t1][t2].fn = fn;
+    colliders[t1][t2].reverse = reverse;
+    colliders[t2][t1].fn = fn;
+    colliders[t2][t1].reverse = !reverse;
+
+    // now call the collider function indirectly through dCollide(), so that
+    // contact reversing is properly handled.
+    return dCollide (o1,o2,flags,contact,skip);
+}
+
+
+int dCreateGeomClass (const dGeomClass *c)
+{
+    dUASSERT(c && c->bytes >= 0 && c->collider && c->aabb,"bad geom class");
+
+    if (num_user_classes >= dMaxUserClasses) {
+        dDebug (0,"too many user classes, you must increase the limit and "
+            "recompile ODE");
+    }
+    user_classes[num_user_classes] = *c;
+    int class_number = num_user_classes + dFirstUserClass;
+    setAllColliders (class_number,&dCollideUserGeomWithGeom);
+
+    num_user_classes++;
+    return class_number;
+}
+
+/*extern */void dFinitUserClasses()
+{
+    num_user_classes = 0;
+}
+
+void * dGeomGetClassData (dxGeom *g)
+{
+    dUASSERT (g && g->type >= dFirstUserClass &&
+        g->type <= dLastUserClass,"not a custom class");
+    dxUserGeom *user = (dxUserGeom*) g;
+    return user->user_data;
+}
+
+
+dGeomID dCreateGeom (int classnum)
+{
+    dUASSERT (classnum >= dFirstUserClass &&
+        classnum <= dLastUserClass,"not a custom class");
+    return new dxUserGeom (classnum);
+}
+
+
+
+/* ************************************************************************ */
+/* geom offset from body */
+
+void dGeomCreateOffset (dxGeom *g)
+{
+    dAASSERT (g);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    dUASSERT (g->body, "geom must be on a body");  
+    if (g->offset_posr)
+    {
+        return; // already created
+    }
+    dIASSERT (g->final_posr == &g->body->posr);
+
+    g->final_posr = dAllocPosr();
+    g->offset_posr = dAllocPosr();
+    dSetZero (g->offset_posr->pos,4);
+    dRSetIdentity (g->offset_posr->R);
+
+    g->gflags |= GEOM_POSR_BAD;
+}
+
+void dGeomSetOffsetPosition (dxGeom *g, dReal x, dReal y, dReal z)
+{
+    dAASSERT (g);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    dUASSERT (g->body, "geom must be on a body");  
+    CHECK_NOT_LOCKED (g->parent_space);
+    if (!g->offset_posr) 
+    {
+        dGeomCreateOffset(g);
+    }
+    g->offset_posr->pos[0] = x;
+    g->offset_posr->pos[1] = y;
+    g->offset_posr->pos[2] = z;
+    dGeomMoved (g);
+}
+
+void dGeomSetOffsetRotation (dxGeom *g, const dMatrix3 R)
+{
+    dAASSERT (g && R);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    dUASSERT (g->body, "geom must be on a body");  
+    CHECK_NOT_LOCKED (g->parent_space);
+    if (!g->offset_posr) 
+    {
+        dGeomCreateOffset (g);
+    }
+    memcpy (g->offset_posr->R,R,sizeof(dMatrix3));
+    dGeomMoved (g);
+}
+
+void dGeomSetOffsetQuaternion (dxGeom *g, const dQuaternion quat)
+{
+    dAASSERT (g && quat);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    dUASSERT (g->body, "geom must be on a body");  
+    CHECK_NOT_LOCKED (g->parent_space);
+    if (!g->offset_posr) 
+    {
+        dGeomCreateOffset (g);
+    }
+    dQtoR (quat, g->offset_posr->R);
+    dGeomMoved (g);
+}
+
+void dGeomSetOffsetWorldPosition (dxGeom *g, dReal x, dReal y, dReal z)
+{
+    dAASSERT (g);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    dUASSERT (g->body, "geom must be on a body");  
+    CHECK_NOT_LOCKED (g->parent_space);
+    if (!g->offset_posr) 
+    {
+        dGeomCreateOffset(g);
+    }
+    dBodyGetPosRelPoint(g->body, x, y, z, g->offset_posr->pos);
+    dGeomMoved (g);
+}
+
+void dGeomSetOffsetWorldRotation (dxGeom *g, const dMatrix3 R)
+{
+    dAASSERT (g && R);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    dUASSERT (g->body, "geom must be on a body");  
+    CHECK_NOT_LOCKED (g->parent_space);
+    if (!g->offset_posr) 
+    {
+        dGeomCreateOffset (g);
+    }
+    g->recomputePosr();
+
+    dxPosR new_final_posr;
+    memcpy(new_final_posr.pos, g->final_posr->pos, sizeof(dVector3));
+    memcpy(new_final_posr.R, R, sizeof(dMatrix3));
+
+    getWorldOffsetPosr(g->body->posr, new_final_posr, *g->offset_posr);
+    dGeomMoved (g);
+}
+
+void dGeomSetOffsetWorldQuaternion (dxGeom *g, const dQuaternion quat)
+{
+    dAASSERT (g && quat);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    dUASSERT (g->body, "geom must be on a body");  
+    CHECK_NOT_LOCKED (g->parent_space);
+    if (!g->offset_posr) 
+    {
+        dGeomCreateOffset (g);
+    }
+
+    g->recomputePosr();
+
+    dxPosR new_final_posr;
+    memcpy(new_final_posr.pos, g->final_posr->pos, sizeof(dVector3));
+    dQtoR (quat, new_final_posr.R);
+
+    getWorldOffsetPosr(g->body->posr, new_final_posr, *g->offset_posr);
+    dGeomMoved (g);
+}
+
+void dGeomClearOffset(dxGeom *g)
+{
+    dAASSERT (g);
+    dUASSERT (g->gflags & GEOM_PLACEABLE,"geom must be placeable");
+    if (g->offset_posr)
+    {
+        dIASSERT(g->body);
+        // no longer need an offset posr
+        dFreePosr(g->offset_posr);
+        g->offset_posr = 0;
+        // the geom will now share the position of the body
+        dFreePosr(g->final_posr);
+        g->final_posr = &g->body->posr;
+        // geom has moved
+        g->gflags &= ~GEOM_POSR_BAD;
+        dGeomMoved (g);
+    }
+}
+
+int dGeomIsOffset(dxGeom *g)
+{
+    dAASSERT (g);
+    return ((0 != g->offset_posr) ? 1 : 0);
+}
+
+static const dVector3 OFFSET_POSITION_ZERO = { 0.0f, 0.0f, 0.0f, 0.0f };
+
+const dReal * dGeomGetOffsetPosition (dxGeom *g)
+{
+    dAASSERT (g);
+    if (g->offset_posr)
+    {
+        return g->offset_posr->pos;
+    }
+    return OFFSET_POSITION_ZERO;
+}
+
+void dGeomCopyOffsetPosition (dxGeom *g, dVector3 pos)
+{
+    dAASSERT (g);
+    if (g->offset_posr)
+    {
+        const dReal* src = g->offset_posr->pos;
+        pos[0] = src[0];
+        pos[1] = src[1];
+        pos[2] = src[2];
+    }
+    else
+    {
+        pos[0] = 0;
+        pos[1] = 0;
+        pos[2] = 0;
+    }
+}
+
+static const dMatrix3 OFFSET_ROTATION_ZERO = 
+{ 
+    1.0f, 0.0f, 0.0f, 0.0f, 
+    0.0f, 1.0f, 0.0f, 0.0f, 
+    0.0f, 0.0f, 1.0f, 0.0f, 
+};
+
+const dReal * dGeomGetOffsetRotation (dxGeom *g)
+{
+    dAASSERT (g);
+    if (g->offset_posr)
+    {
+        return g->offset_posr->R;
+    }
+    return OFFSET_ROTATION_ZERO;
+}
+
+void dGeomCopyOffsetRotation (dxGeom *g, dMatrix3 R)
+{
+    dAASSERT (g);
+    if (g->offset_posr)
+    {
+        const dReal* src = g->offset_posr->R;
+        R[0]  = src[0];
+        R[1]  = src[1];
+        R[2]  = src[2];
+        R[4]  = src[4];
+        R[5]  = src[5];
+        R[6]  = src[6];
+        R[8]  = src[8];
+        R[9]  = src[9];
+        R[10] = src[10];
+    }
+    else
+    {
+        R[0]  = OFFSET_ROTATION_ZERO[0];
+        R[1]  = OFFSET_ROTATION_ZERO[1];
+        R[2]  = OFFSET_ROTATION_ZERO[2];
+        R[4]  = OFFSET_ROTATION_ZERO[4];
+        R[5]  = OFFSET_ROTATION_ZERO[5];
+        R[6]  = OFFSET_ROTATION_ZERO[6];
+        R[8]  = OFFSET_ROTATION_ZERO[8];
+        R[9]  = OFFSET_ROTATION_ZERO[9];
+        R[10] = OFFSET_ROTATION_ZERO[10];
+    }
+}
+
+void dGeomGetOffsetQuaternion (dxGeom *g, dQuaternion result)
+{
+    dAASSERT (g);
+    if (g->offset_posr)
+    {
+        dRtoQ (g->offset_posr->R, result);
+    }
+    else
+    {
+        dSetZero (result,4);
+        result[0] = 1;
+    }
+}
+
+
diff --git a/libs/ode-0.16.1/ode/src/collision_kernel.h b/libs/ode-0.16.1/ode/src/collision_kernel.h
new file mode 100644
index 0000000..c982972
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_kernel.h
@@ -0,0 +1,293 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+internal data structures and functions for collision detection.
+
+*/
+
+#ifndef _ODE_COLLISION_KERNEL_H_
+#define _ODE_COLLISION_KERNEL_H_
+
+#include <ode/common.h>
+#include <ode/contact.h>
+#include <ode/collision.h>
+#include "objects.h"
+#include "odetls.h"
+#include "common.h"
+
+
+//****************************************************************************
+// constants and macros
+
+// mask for the number-of-contacts field in the dCollide() flags parameter
+#define NUMC_MASK (0xffff)
+
+#define IS_SPACE(geom) \
+    dIN_RANGE((geom)->type, dFirstSpaceClass, dLastSpaceClass + 1)
+
+#define CHECK_NOT_LOCKED(space) \
+    dUASSERT ((space) == NULL || (space)->lock_count == 0, \
+        "Invalid operation for locked space")
+
+
+//****************************************************************************
+// geometry object base class
+
+
+// geom flags.
+//
+// GEOM_DIRTY means that the space data structures for this geom are
+// potentially not up to date. NOTE THAT all space parents of a dirty geom
+// are themselves dirty. this is an invariant that must be enforced.
+//
+// GEOM_AABB_BAD means that the cached AABB for this geom is not up to date.
+// note that GEOM_DIRTY does not imply GEOM_AABB_BAD, as the geom might
+// recalculate its own AABB but does not know how to update the space data
+// structures for the space it is in. but GEOM_AABB_BAD implies GEOM_DIRTY.
+// the valid combinations are: 
+//		0
+//		GEOM_DIRTY
+//		GEOM_DIRTY|GEOM_AABB_BAD
+//		GEOM_DIRTY|GEOM_AABB_BAD|GEOM_POSR_BAD
+
+enum {
+    GEOM_DIRTY	= 1,    // geom is 'dirty', i.e. position unknown
+    GEOM_POSR_BAD = 2,    // geom's final posr is not valid
+    GEOM_AABB_BAD	= 4,    // geom's AABB is not valid
+    GEOM_PLACEABLE = 8,   // geom is placeable
+    GEOM_ENABLED = 16,    // geom is enabled
+    GEOM_ZERO_SIZED = 32, // geom is zero sized
+
+    GEOM_ENABLE_TEST_MASK = GEOM_ENABLED | GEOM_ZERO_SIZED,
+    GEOM_ENABLE_TEST_VALUE = GEOM_ENABLED,
+
+    // Ray specific
+    RAY_FIRSTCONTACT = 0x10000,
+    RAY_BACKFACECULL = 0x20000,
+    RAY_CLOSEST_HIT  = 0x40000
+};
+
+enum dxContactMergeOptions {
+    DONT_MERGE_CONTACTS,
+    MERGE_CONTACT_NORMALS,
+    MERGE_CONTACTS_FULLY
+};
+
+
+// geometry object base class. pos and R will either point to a separately
+// allocated buffer (if body is 0 - pos points to the dxPosR object) or to
+// the pos and R of the body (if body nonzero).
+// a dGeomID is a pointer to this object.
+
+struct dxGeom : public dBase {
+    int type;		// geom type number, set by subclass constructor
+    int gflags;		// flags used by geom and space
+    void *data;		// user-defined data pointer
+    dBodyID body;		// dynamics body associated with this object (if any)
+    dxGeom *body_next;	// next geom in body's linked list of associated geoms
+    dxPosR *final_posr;	// final position of the geom in world coordinates
+    dxPosR *offset_posr;	// offset from body in local coordinates
+
+    // information used by spaces
+    dxGeom *next;		// next geom in linked list of geoms
+    dxGeom **tome;	// linked list backpointer
+    dxGeom *next_ex;	// next geom in extra linked list of geoms (for higher level structures)
+    dxGeom **tome_ex;	// extra linked list backpointer (for higher level structures)
+    dxSpace *parent_space;// the space this geom is contained in, 0 if none
+    dReal aabb[6];	// cached AABB for this space
+    unsigned long category_bits,collide_bits;
+
+    dxGeom (dSpaceID _space, int is_placeable);
+    virtual ~dxGeom();
+
+    // Set or clear GEOM_ZERO_SIZED flag
+    void updateZeroSizedFlag(bool is_zero_sized) { gflags = is_zero_sized ? (gflags | GEOM_ZERO_SIZED) : (gflags & ~GEOM_ZERO_SIZED); }
+    // Get parent space TLS kind
+    unsigned getParentSpaceTLSKind() const;
+
+    const dVector3 &buildUpdatedPosition()
+    {
+        dIASSERT(gflags & GEOM_PLACEABLE);
+        
+        recomputePosr();
+        return final_posr->pos;
+    }
+
+    const dMatrix3 &buildUpdatedRotation()
+    {
+        dIASSERT(gflags & GEOM_PLACEABLE);
+
+        recomputePosr();
+        return final_posr->R;
+    }
+
+    // recalculate our new final position if needed
+    void recomputePosr()
+    {
+        if (gflags & GEOM_POSR_BAD) {
+            computePosr();
+            gflags &= ~GEOM_POSR_BAD;
+        }
+    }
+
+    // calculate our new final position from our offset and body
+    void computePosr();
+
+    bool checkControlValueSizeValidity(void *dataValue, int *dataSize, int iRequiresSize) { return (*dataSize == iRequiresSize && dataValue != 0) ? true : !(*dataSize = iRequiresSize); } // Here it is the intent to return true for 0 required size in any case
+    virtual bool controlGeometry(int controlClass, int controlCode, void *dataValue, int *dataSize);
+
+    virtual void computeAABB()=0;
+    // compute the AABB for this object and put it in aabb. this function
+    // always performs a fresh computation, it does not inspect the
+    // GEOM_AABB_BAD flag.
+
+    virtual int AABBTest (dxGeom *o, dReal aabb[6]);
+    // test whether the given AABB object intersects with this object, return
+    // 1=yes, 0=no. this is used as an early-exit test in the space collision
+    // functions. the default implementation returns 1, which is the correct
+    // behavior if no more detailed implementation can be provided.
+
+    // utility functions
+
+    // compute the AABB only if it is not current. this function manipulates
+    // the GEOM_AABB_BAD flag.
+
+    void recomputeAABB() {
+        if (gflags & GEOM_AABB_BAD) {
+            // our aabb functions assume final_posr is up to date
+            recomputePosr(); 
+            computeAABB();
+            gflags &= ~GEOM_AABB_BAD;
+        }
+    }
+
+    inline void markAABBBad();
+
+    // add and remove this geom from a linked list maintained by a space.
+
+    void spaceAdd (dxGeom **first_ptr) {
+        next = *first_ptr;
+        tome = first_ptr;
+        if (*first_ptr) (*first_ptr)->tome = &next;
+        *first_ptr = this;
+    }
+    void spaceRemove() {
+        if (next) next->tome = tome;
+        *tome = next;
+    }
+
+    // add and remove this geom from a linked list maintained by a body.
+
+    void bodyAdd (dxBody *b) {
+        body = b;
+        body_next = b->geom;
+        b->geom = this;
+    }
+    void bodyRemove();
+};
+
+//****************************************************************************
+// the base space class
+//
+// the contained geoms are divided into two kinds: clean and dirty.
+// the clean geoms have not moved since they were put in the list,
+// and their AABBs are valid. the dirty geoms have changed position, and
+// their AABBs are may not be valid. the two types are distinguished by the
+// GEOM_DIRTY flag. all dirty geoms come *before* all clean geoms in the list.
+
+#if dTLS_ENABLED
+#define dSPACE_TLS_KIND_INIT_VALUE OTK__DEFAULT
+#define dSPACE_TLS_KIND_MANUAL_VALUE OTK_MANUALCLEANUP
+#else
+#define dSPACE_TLS_KIND_INIT_VALUE 0
+#define dSPACE_TLS_KIND_MANUAL_VALUE 0
+#endif
+
+struct dxSpace : public dxGeom {
+    int count;			// number of geoms in this space
+    dxGeom *first;		// first geom in list
+    int cleanup;			// cleanup mode, 1=destroy geoms on exit
+    int sublevel;         // space sublevel (used in dSpaceCollide2). NOT TRACKED AUTOMATICALLY!!!
+    unsigned tls_kind;	// space TLS kind to be used for global caches retrieval
+
+    // cached state for getGeom()
+    int current_index;		// only valid if current_geom != 0
+    dxGeom *current_geom;		// if 0 then there is no information
+
+    // locking stuff. the space is locked when it is currently traversing its
+    // internal data structures, e.g. in collide() and collide2(). operations
+    // that modify the contents of the space are not permitted when the space
+    // is locked.
+    int lock_count;
+
+    dxSpace (dSpaceID _space);
+    ~dxSpace();
+
+    void computeAABB();
+
+    void setCleanup (int mode) { cleanup = (mode != 0); }
+    int getCleanup() const { return cleanup; }
+    void setSublevel(int value) { sublevel = value; }
+    int getSublevel() const { return sublevel; }
+    void setManulCleanup(int value) { tls_kind = (value ? dSPACE_TLS_KIND_MANUAL_VALUE : dSPACE_TLS_KIND_INIT_VALUE); }
+    int getManualCleanup() const { return (tls_kind == dSPACE_TLS_KIND_MANUAL_VALUE) ? 1 : 0; }
+    int query (dxGeom *geom) const { dAASSERT(geom); return (geom->parent_space == this); }
+    int getNumGeoms() const { return count; }
+
+    virtual dxGeom *getGeom (int i);
+
+    virtual void add (dxGeom *);
+    virtual void remove (dxGeom *);
+    virtual void dirty (dxGeom *);
+
+    virtual void cleanGeoms()=0;
+    // turn all dirty geoms into clean geoms by computing their AABBs and any
+    // other space data structures that are required. this should clear the
+    // GEOM_DIRTY and GEOM_AABB_BAD flags of all geoms.
+
+    virtual void collide (void *data, dNearCallback *callback)=0;
+    virtual void collide2 (void *data, dxGeom *geom, dNearCallback *callback)=0;
+};
+
+
+//////////////////////////////////////////////////////////////////////////
+
+/*inline */
+void dxGeom::markAABBBad() {
+    gflags |= (GEOM_DIRTY | GEOM_AABB_BAD);
+    CHECK_NOT_LOCKED(parent_space);
+}
+
+
+//****************************************************************************
+// Initialization and finalization functions
+
+void dInitColliders();
+void dFinitColliders();
+
+void dClearPosrCache(void);
+void dFinitUserClasses();
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/collision_libccd.cpp b/libs/ode-0.16.1/ode/src/collision_libccd.cpp
new file mode 100644
index 0000000..ba15e83
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_libccd.cpp
@@ -0,0 +1,1080 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/collision.h>
+#include <ccd/ccd.h>
+#include "ccdcustom/vec3.h"
+#include "ccdcustom/quat.h"
+#include "config.h"
+#include "odemath.h"
+#include "collision_libccd.h"
+#include "collision_trimesh_internal.h"
+#include "collision_std.h"
+#include "collision_util.h"
+#include "error.h"
+
+
+struct _ccd_obj_t {
+    ccd_vec3_t pos;
+    ccd_quat_t rot, rot_inv;
+};
+typedef struct _ccd_obj_t ccd_obj_t;
+
+struct _ccd_box_t {
+    ccd_obj_t o;
+    ccd_real_t dim[3];
+};
+typedef struct _ccd_box_t ccd_box_t;
+
+struct _ccd_cap_t {
+    ccd_obj_t o;
+    ccd_real_t radius;
+    ccd_vec3_t axis;
+    ccd_vec3_t p1;
+    ccd_vec3_t p2;
+};
+typedef struct _ccd_cap_t ccd_cap_t;
+
+struct _ccd_cyl_t {
+    ccd_obj_t o;
+    ccd_real_t radius;
+    ccd_vec3_t axis;
+    ccd_vec3_t p1;
+    ccd_vec3_t p2;
+};
+typedef struct _ccd_cyl_t ccd_cyl_t;
+
+struct _ccd_sphere_t {
+    ccd_obj_t o;
+    ccd_real_t radius;
+};
+typedef struct _ccd_sphere_t ccd_sphere_t;
+
+struct _ccd_convex_t {
+    ccd_obj_t o;
+    dxConvex *convex;
+};
+typedef struct _ccd_convex_t ccd_convex_t;
+
+struct _ccd_triangle_t {
+    ccd_obj_t o;
+    ccd_vec3_t vertices[3];
+};
+typedef struct _ccd_triangle_t ccd_triangle_t;
+
+/** Transforms geom to ccd struct */
+static void ccdGeomToObj(const dGeomID g, ccd_obj_t *);
+static void ccdGeomToBox(const dGeomID g, ccd_box_t *);
+static void ccdGeomToCap(const dGeomID g, ccd_cap_t *);
+static void ccdGeomToCyl(const dGeomID g, ccd_cyl_t *);
+static void ccdGeomToSphere(const dGeomID g, ccd_sphere_t *);
+static void ccdGeomToConvex(const dGeomID g, ccd_convex_t *);
+
+/** Support functions */
+static void ccdSupportBox(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v);
+static void ccdSupportCap(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v);
+static void ccdSupportCyl(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v);
+static void ccdSupportSphere(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v);
+static void ccdSupportConvex(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v);
+
+/** Center function */
+static void ccdCenter(const void *obj, ccd_vec3_t *c);
+
+/** General collide function */
+static int ccdCollide(dGeomID o1, dGeomID o2, int flags,
+    dContactGeom *contact, int skip,
+    void *obj1, ccd_support_fn supp1, ccd_center_fn cen1,
+    void *obj2, ccd_support_fn supp2, ccd_center_fn cen2);
+
+static int collideCylCyl(dxGeom *o1, dxGeom *o2, ccd_cyl_t* cyl1, ccd_cyl_t* cyl2, int flags, dContactGeom *contacts, int skip);
+static bool testAndPrepareDiscContactForAngle(dReal angle, dReal radius, dReal length, dReal lSum, ccd_cyl_t *priCyl, ccd_cyl_t *secCyl, ccd_vec3_t &p, dReal &out_depth);
+// Adds a contact between 2 cylinders
+static int addCylCylContact(dxGeom *o1, dxGeom *o2, ccd_vec3_t* axis, dContactGeom *contacts, ccd_vec3_t* p, dReal normaldir, dReal depth, int j, int flags, int skip);
+
+static unsigned addTrianglePerturbedContacts(dxGeom *o1, dxGeom *o2, IFaceAngleStorageView *meshFaceAngleView, 
+    const int *indices, unsigned numIndices, int flags, dContactGeom *contacts, int skip,
+    ccd_convex_t *c1, ccd_triangle_t *c2, dVector3 *triangle, dContactGeom *contact, unsigned contacCount);
+static bool correctTriangleContactNormal(ccd_triangle_t *t, dContactGeom *contact, IFaceAngleStorageView *meshFaceAngleView, const int *indices, unsigned numIndices);
+static unsigned addUniqueContact(dContactGeom *contacts, dContactGeom *c, unsigned contactcount, unsigned maxcontacts, int flags, int skip);
+static void setObjPosToTriangleCenter(ccd_triangle_t *t); 
+static void ccdSupportTriangle(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v);
+
+
+static 
+void ccdGeomToObj(const dGeomID g, ccd_obj_t *o)
+{
+    const dReal *ode_pos;
+    dQuaternion ode_rot;
+
+    ode_pos = dGeomGetPosition(g);
+    dGeomGetQuaternion(g, ode_rot);
+
+    ccdVec3Set(&o->pos, ode_pos[0], ode_pos[1], ode_pos[2]);
+    ccdQuatSet(&o->rot, ode_rot[1], ode_rot[2], ode_rot[3], ode_rot[0]);
+
+    ccdQuatInvert2(&o->rot_inv, &o->rot);
+}
+
+static 
+void ccdGeomToBox(const dGeomID g, ccd_box_t *box)
+{
+    dVector3 dim;
+
+    ccdGeomToObj(g, (ccd_obj_t *)box);
+
+    dGeomBoxGetLengths(g, dim);
+    box->dim[0] = (ccd_real_t)(dim[0] * 0.5);
+    box->dim[1] = (ccd_real_t)(dim[1] * 0.5);
+    box->dim[2] = (ccd_real_t)(dim[2] * 0.5);
+}
+
+static 
+void ccdGeomToCap(const dGeomID g, ccd_cap_t *cap)
+{
+    dReal r, h;
+    ccdGeomToObj(g, (ccd_obj_t *)cap);
+
+    dGeomCapsuleGetParams(g, &r, &h);
+    cap->radius = r;
+    ccdVec3Set(&cap->axis, 0.0, 0.0, h / 2);
+    ccdQuatRotVec(&cap->axis, &cap->o.rot);
+    ccdVec3Copy(&cap->p1, &cap->axis);
+    ccdVec3Copy(&cap->p2, &cap->axis);
+    ccdVec3Scale(&cap->p2, -1.0);
+    ccdVec3Add(&cap->p1, &cap->o.pos);
+    ccdVec3Add(&cap->p2, &cap->o.pos);
+}
+
+static 
+void ccdGeomToCyl(const dGeomID g, ccd_cyl_t *cyl)
+{
+    dReal r, h;
+    ccdGeomToObj(g, (ccd_obj_t *)cyl);
+
+    dGeomCylinderGetParams(g, &r, &h);
+    cyl->radius = r;
+    ccdVec3Set(&cyl->axis, 0.0, 0.0, h / 2);
+    ccdQuatRotVec(&cyl->axis, &cyl->o.rot);
+    ccdVec3Copy(&cyl->p1, &cyl->axis);
+    ccdVec3Copy(&cyl->p2, &cyl->axis);
+    int cylAxisNormalizationResult = ccdVec3SafeNormalize(&cyl->axis);
+    dUVERIFY(cylAxisNormalizationResult == 0, "Invalid cylinder has been passed");
+    ccdVec3Scale(&cyl->p2, -1.0);
+    ccdVec3Add(&cyl->p1, &cyl->o.pos);
+    ccdVec3Add(&cyl->p2, &cyl->o.pos);
+}
+
+static 
+void ccdGeomToSphere(const dGeomID g, ccd_sphere_t *s)
+{
+    ccdGeomToObj(g, (ccd_obj_t *)s);
+    s->radius = dGeomSphereGetRadius(g);
+}
+
+static 
+void ccdGeomToConvex(const dGeomID g, ccd_convex_t *c)
+{
+    ccdGeomToObj(g, (ccd_obj_t *)c);
+    c->convex = (dxConvex *)g;
+}
+
+
+static 
+void ccdSupportBox(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v)
+{
+    const ccd_box_t *o = (const ccd_box_t *)obj;
+    ccd_vec3_t dir;
+
+    ccdVec3Copy(&dir, _dir);
+    ccdQuatRotVec(&dir, &o->o.rot_inv);
+
+    ccdVec3Set(v, ccdSign(ccdVec3X(&dir)) * o->dim[0],
+        ccdSign(ccdVec3Y(&dir)) * o->dim[1],
+        ccdSign(ccdVec3Z(&dir)) * o->dim[2]);
+
+    // transform support vertex
+    ccdQuatRotVec(v, &o->o.rot);
+    ccdVec3Add(v, &o->o.pos);
+}
+
+static 
+void ccdSupportCap(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v)
+{
+    const ccd_cap_t *o = (const ccd_cap_t *)obj;
+
+    ccdVec3Copy(v, _dir);
+    ccdVec3Scale(v, o->radius);
+
+    if (ccdVec3Dot(_dir, &o->axis) > 0.0){
+        ccdVec3Add(v, &o->p1);
+    }else{
+        ccdVec3Add(v, &o->p2);
+    }
+
+}
+
+static 
+void ccdSupportCyl(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v)
+{
+    const ccd_cyl_t *cyl = (const ccd_cyl_t *)obj;
+    ccd_vec3_t dir;
+    ccd_real_t len;
+    
+    ccd_real_t dot = ccdVec3Dot(_dir, &cyl->axis);
+    if (dot > 0.0){
+        ccdVec3Copy(v, &cyl->p1);
+    } else{
+        ccdVec3Copy(v, &cyl->p2);
+    }
+    // project dir onto cylinder's 'top'/'bottom' plane
+    ccdVec3Copy(&dir, &cyl->axis);
+    ccdVec3Scale(&dir, -dot);
+    ccdVec3Add(&dir, _dir);
+    len = CCD_SQRT(ccdVec3Len2(&dir));
+    if (!ccdIsZero(len)) {
+        ccdVec3Scale(&dir, cyl->radius / len);
+        ccdVec3Add(v, &dir);
+    }
+}
+
+static 
+void ccdSupportSphere(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v)
+{
+    const ccd_sphere_t *s = (const ccd_sphere_t *)obj;
+
+    ccdVec3Copy(v, _dir);
+    ccdVec3Scale(v, s->radius);
+    dIASSERT(dFabs(CCD_SQRT(ccdVec3Len2(_dir)) - REAL(1.0)) < 1e-6); // ccdVec3Scale(v, CCD_ONE / CCD_SQRT(ccdVec3Len2(_dir)));
+
+    ccdVec3Add(v, &s->o.pos);
+}
+
+static 
+void ccdSupportConvex(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v)
+{
+    const ccd_convex_t *c = (const ccd_convex_t *)obj;
+    ccd_vec3_t dir, p;
+    ccd_real_t maxdot, dot;
+    sizeint i;
+    const dReal *curp;
+
+    ccdVec3Copy(&dir, _dir);
+    ccdQuatRotVec(&dir, &c->o.rot_inv);
+
+    maxdot = -CCD_REAL_MAX;
+    curp = c->convex->points;
+    for (i = 0; i < c->convex->pointcount; i++, curp += 3){
+        ccdVec3Set(&p, curp[0], curp[1], curp[2]);
+        dot = ccdVec3Dot(&dir, &p);
+        if (dot > maxdot){
+            ccdVec3Copy(v, &p);
+            maxdot = dot;
+        }
+    }
+
+
+    // transform support vertex
+    ccdQuatRotVec(v, &c->o.rot);
+    ccdVec3Add(v, &c->o.pos);
+}
+
+static 
+void ccdCenter(const void *obj, ccd_vec3_t *c)
+{
+    const ccd_obj_t *o = (const ccd_obj_t *)obj;
+    ccdVec3Copy(c, &o->pos);
+}
+
+static 
+int ccdCollide(
+    dGeomID o1, dGeomID o2, int flags, dContactGeom *contact, int skip,
+    void *obj1, ccd_support_fn supp1, ccd_center_fn cen1,
+    void *obj2, ccd_support_fn supp2, ccd_center_fn cen2)
+{
+    ccd_t ccd;
+    int res;
+    ccd_real_t depth;
+    ccd_vec3_t dir, pos;
+    int max_contacts = (flags & NUMC_MASK);
+
+    if (max_contacts < 1)
+        return 0;
+
+    CCD_INIT(&ccd);
+    ccd.support1 = supp1;
+    ccd.support2 = supp2;
+    ccd.center1  = cen1;
+    ccd.center2  = cen2;
+    ccd.max_iterations = 500;
+    ccd.mpr_tolerance = (ccd_real_t)1E-6;
+
+
+    if (flags & CONTACTS_UNIMPORTANT){
+        if (ccdMPRIntersect(obj1, obj2, &ccd)){
+            return 1;
+        }else{
+            return 0;
+        }
+    }
+
+    res = ccdMPRPenetration(obj1, obj2, &ccd, &depth, &dir, &pos);
+    if (res == 0){
+        contact->g1 = o1;
+        contact->g2 = o2;
+
+        contact->side1 = contact->side2 = -1;
+
+        contact->depth = depth;
+
+        contact->pos[0] = ccdVec3X(&pos);
+        contact->pos[1] = ccdVec3Y(&pos);
+        contact->pos[2] = ccdVec3Z(&pos);
+
+        ccdVec3Scale(&dir, -1.);
+        contact->normal[0] = ccdVec3X(&dir);
+        contact->normal[1] = ccdVec3Y(&dir);
+        contact->normal[2] = ccdVec3Z(&dir);
+
+        return 1;
+    }
+
+    return 0;
+}
+
+/*extern */
+int dCollideBoxCylinderCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    ccd_cyl_t cyl;
+    ccd_box_t box;
+
+    ccdGeomToBox(o1, &box);
+    ccdGeomToCyl(o2, &cyl);
+
+    return ccdCollide(o1, o2, flags, contact, skip,
+        &box, ccdSupportBox, ccdCenter,
+        &cyl, ccdSupportCyl, ccdCenter);
+}
+
+/*extern */
+int dCollideCapsuleCylinder(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    ccd_cap_t cap;
+    ccd_cyl_t cyl;
+
+    ccdGeomToCap(o1, &cap);
+    ccdGeomToCyl(o2, &cyl);
+
+    return ccdCollide(o1, o2, flags, contact, skip,
+        &cap, ccdSupportCap, ccdCenter,
+        &cyl, ccdSupportCyl, ccdCenter);
+}
+
+/*extern */
+int dCollideConvexBoxCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    ccd_box_t box;
+    ccd_convex_t conv;
+
+    ccdGeomToConvex(o1, &conv);
+    ccdGeomToBox(o2, &box);
+
+    return ccdCollide(o1, o2, flags, contact, skip,
+        &conv, ccdSupportConvex, ccdCenter,
+        &box, ccdSupportBox, ccdCenter);
+}
+
+/*extern */
+int dCollideConvexCapsuleCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    ccd_cap_t cap;
+    ccd_convex_t conv;
+
+    ccdGeomToConvex(o1, &conv);
+    ccdGeomToCap(o2, &cap);
+
+    return ccdCollide(o1, o2, flags, contact, skip,
+        &conv, ccdSupportConvex, ccdCenter,
+        &cap, ccdSupportCap, ccdCenter);
+}
+
+/*extern */
+int dCollideConvexSphereCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    ccd_sphere_t sphere;
+    ccd_convex_t conv;
+
+    ccdGeomToConvex(o1, &conv);
+    ccdGeomToSphere(o2, &sphere);
+
+    return ccdCollide(o1, o2, flags, contact, skip,
+        &conv, ccdSupportConvex, ccdCenter,
+        &sphere, ccdSupportSphere, ccdCenter);
+}
+
+/*extern */
+int dCollideConvexCylinderCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    ccd_cyl_t cyl;
+    ccd_convex_t conv;
+
+    ccdGeomToConvex(o1, &conv);
+    ccdGeomToCyl(o2, &cyl);
+
+    return ccdCollide(o1, o2, flags, contact, skip,
+        &conv, ccdSupportConvex, ccdCenter,
+        &cyl, ccdSupportCyl, ccdCenter);
+}
+
+/*extern */
+int dCollideConvexConvexCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    ccd_convex_t c1, c2;
+
+    ccdGeomToConvex(o1, &c1);
+    ccdGeomToConvex(o2, &c2);
+
+    return ccdCollide(o1, o2, flags, contact, skip,
+        &c1, ccdSupportConvex, ccdCenter,
+        &c2, ccdSupportConvex, ccdCenter);
+}
+
+
+/*extern */
+int dCollideCylinderCylinder(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    ccd_cyl_t cyl1, cyl2;
+    
+    ccdGeomToCyl(o1, &cyl1);
+    ccdGeomToCyl(o2, &cyl2);
+    
+    int numContacts = collideCylCyl(o1, o2, &cyl1, &cyl2, flags, contact, skip);
+    if (numContacts < 0) {
+        numContacts = ccdCollide(o1, o2, flags, contact, skip,
+                                 &cyl1, ccdSupportCyl, ccdCenter,
+                                 &cyl2, ccdSupportCyl, ccdCenter);
+    }
+    return numContacts;
+}
+
+static 
+int collideCylCyl(dxGeom *o1, dxGeom *o2, ccd_cyl_t* cyl1, ccd_cyl_t* cyl2, int flags, dContactGeom *contacts, int skip) 
+{
+    int maxContacts = (flags & NUMC_MASK);
+    dAASSERT(maxContacts != 0);
+
+    maxContacts = maxContacts > 8 ? 8 : maxContacts;
+    
+    dReal axesProd = dFabs(ccdVec3Dot(&cyl1->axis, &cyl2->axis));
+    // Check if cylinders' axes are in line
+    if (REAL(1.0) - axesProd < 1e-3f) {
+        ccd_vec3_t p, proj;
+        dReal r1, l1;
+        dReal r2, l2;
+        dGeomCylinderGetParams(o1, &r1, &l1);
+        dGeomCylinderGetParams(o2, &r2, &l2);
+        l1 *= 0.5f;
+        l2 *= 0.5f;
+       
+        // Determine the cylinder with smaller radius (minCyl) and bigger radius (maxCyl) and their respective properties: radius, length
+        bool r1IsMin;
+        dReal rmin, rmax;
+        ccd_cyl_t *minCyl, *maxCyl;
+        if (r1 <= r2) {
+            rmin = r1; rmax = r2;
+            minCyl = cyl1; maxCyl = cyl2;
+            r1IsMin = true;
+        }
+        else {
+            rmin = r2; rmax = r1;
+            minCyl = cyl2; maxCyl = cyl1;
+            r1IsMin = false;
+        }
+
+        dReal lSum = l1 + l2;
+
+        ccdVec3Copy(&p, &minCyl->o.pos);
+        ccdVec3Sub(&p, &maxCyl->o.pos);
+        dReal dot = ccdVec3Dot(&p, &maxCyl->axis);
+        
+        // Maximum possible contact depth
+        dReal depth_v = lSum - dFabs(dot) + dSqrt(dMax(0, REAL(1.0) - axesProd * axesProd)) * rmin;
+        if (depth_v < 0) {
+            return 0;
+        }
+
+        // Project the smaller cylinder's center onto the larger cylinder's plane
+        ccdVec3Copy(&proj, &maxCyl->axis);
+        ccdVec3Scale(&proj, -dot);
+        ccdVec3Add(&proj, &p);
+        dReal radiiDiff = (dReal)sqrt(ccdVec3Len2(&proj));
+        dReal depth_h = r1 + r2 - radiiDiff;
+
+        // Check the distance between cylinders' centers
+        if (depth_h < 0) {
+            return 0;
+        }
+
+        // Check if "vertical" contact depth is less than "horizontal" contact depth
+        if (depth_v < depth_h) {
+            int contactCount = 0;
+            dReal dot2 = -ccdVec3Dot(&p, &minCyl->axis);
+            // lmin, lmax - distances from cylinders' centers to potential contact points relative to cylinders' axes
+            dReal lmax = r1IsMin ? l2 : l1;
+            dReal lmin = r1IsMin ? l1 : l2;
+            lmin = dot2 < 0 ? -lmin : lmin;
+            lmax = dot < 0 ? -lmax : lmax;
+            // Contact normal direction, relative to o1's axis
+            dReal normaldir = (dot < 0) != r1IsMin ? REAL(1.0) : -REAL(1.0);
+            
+            if (rmin + radiiDiff <= rmax) {
+                // Case 1: The smaller disc is fully contained within the larger one
+                // Simply generate N points on the rim of the smaller disc
+                dReal maxContactsRecip = (dReal)(0 < maxContacts ? (2.0 * M_PI / maxContacts) : (2.0 * M_PI)); // The 'else' value does not matter. Just try helping the optimizer.
+                for (int i = 0; i < maxContacts; i++) {
+                    dReal depth;
+                    dReal a = maxContactsRecip * i;
+                    if (testAndPrepareDiscContactForAngle(a, rmin, lmin, lSum, minCyl, maxCyl, p, depth)) {
+                        contactCount = addCylCylContact(o1, o2, &maxCyl->axis, contacts, &p, normaldir, depth, contactCount, flags, skip);
+                        if ((flags & CONTACTS_UNIMPORTANT) != 0) {
+                            dIASSERT(contactCount != 0);
+                            break;
+                        }
+                    }
+                }
+                return contactCount;
+
+            } else {
+                // Case 2: Discs intersect
+                // Firstly, find intersections assuming the larger cylinder is placed at (0,0,0)
+                // http://math.stackexchange.com/questions/256100/how-can-i-find-the-points-at-which-two-circles-intersect
+                ccd_vec3_t proj2;
+                ccdVec3Copy(&proj2, &proj);
+                ccdQuatRotVec(&proj, &maxCyl->o.rot_inv);
+                dReal d = dSqrt(ccdVec3X(&proj) * ccdVec3X(&proj) + ccdVec3Y(&proj) * ccdVec3Y(&proj));
+                dIASSERT(d != REAL(0.0));
+                
+                dReal dRecip = REAL(1.0) / d;
+                dReal rmaxSquare = rmax * rmax, rminSquare = rmin * rmin, dSquare = d * d;
+
+                dReal minA, diffA, minB, diffB;
+
+                {
+                    dReal l = (rmaxSquare - rminSquare + dSquare) * (REAL(0.5) * dRecip);
+                    dReal h = dSqrt(rmaxSquare - l * l);
+                    dReal divLbyD = l * dRecip, divHbyD = h * dRecip;
+                    dReal x1 = divLbyD * ccdVec3X(&proj) + divHbyD * ccdVec3Y(&proj);
+                    dReal y1 = divLbyD * ccdVec3Y(&proj) - divHbyD * ccdVec3X(&proj);
+                    dReal x2 = divLbyD * ccdVec3X(&proj) - divHbyD * ccdVec3Y(&proj);
+                    dReal y2 = divLbyD * ccdVec3Y(&proj) + divHbyD * ccdVec3X(&proj);
+                    // Map the intersection points to angles
+                    dReal ap1 = dAtan2(y1, x1);
+                    dReal ap2 = dAtan2(y2, x2);
+                    minA = dMin(ap1, ap2);
+                    dReal maxA = dMax(ap1, ap2);
+                    // If the segment connecting cylinders' centers does not intersect the arc, change the angles
+                    dReal a = dAtan2(ccdVec3Y(&proj), ccdVec3X(&proj));
+                    if (a < minA || a > maxA) {
+                        a = maxA;
+                        maxA = (dReal)(minA + M_PI * 2.0);
+                        minA = a;
+                    }
+                    diffA = maxA - minA;
+                }
+                
+                // Do the same for the smaller cylinder assuming it is placed at (0,0,0) now
+                ccdVec3Copy(&proj, &proj2);
+                ccdVec3Scale(&proj, -1);
+                ccdQuatRotVec(&proj, &minCyl->o.rot_inv);
+                
+                {
+                    dReal l = (rminSquare - rmaxSquare + dSquare) * (REAL(0.5) * dRecip);
+                    dReal h = dSqrt(rminSquare - l * l);
+                    dReal divLbyD = l * dRecip, divHbyD = h * dRecip;
+                    dReal x1 = divLbyD * ccdVec3X(&proj) + divHbyD * ccdVec3Y(&proj);
+                    dReal y1 = divLbyD * ccdVec3Y(&proj) - divHbyD * ccdVec3X(&proj);
+                    dReal x2 = divLbyD * ccdVec3X(&proj) - divHbyD * ccdVec3Y(&proj);
+                    dReal y2 = divLbyD * ccdVec3Y(&proj) + divHbyD * ccdVec3X(&proj);
+                    dReal ap1 = dAtan2(y1, x1);
+                    dReal ap2 = dAtan2(y2, x2);
+                    minB = dMin(ap1, ap2);
+                    dReal maxB = dMax(ap1, ap2);
+                    dReal a = dAtan2(ccdVec3Y(&proj), ccdVec3X(&proj));
+                    if (a < minB || a > maxB) {
+                        a = maxB;
+                        maxB = (dReal)(minB + M_PI * 2.0);
+                        minB = a;
+                    }
+                    diffB = maxB - minB;
+                }
+
+                // Find contact point distribution ratio based on arcs lengths
+                dReal ratio = diffA * rmax  / (diffA * rmax + diffB  * rmin);
+                dIASSERT(ratio <= REAL(1.0)); 
+                dIASSERT(ratio >= REAL(0.0));
+
+                int nMax = (int)dFloor(ratio * maxContacts + REAL(0.5));
+                int nMin = maxContacts - nMax;
+                dIASSERT(nMax <= maxContacts);
+
+                // Make sure there is at least one point on the smaller radius rim
+                if (nMin < 1) {
+                    nMin = 1; nMax -= 1;
+                }
+                // Otherwise transfer one point to the larger radius rim as it is going to fill the rim intersection points
+                else if (nMin > 1) {
+                    nMin -= 1; nMax += 1;
+                }
+
+                // Smaller disc first, skipping the overlapping points
+                dReal nMinRecip = 0 < nMin ? diffB / (nMin + 1) : diffB; // The 'else' value does not matter. Just try helping the optimizer.
+                for (int i = 1; i <= nMin; i++) {
+                    dReal depth;
+                    dReal a = minB + nMinRecip * i;
+                    if (testAndPrepareDiscContactForAngle(a, rmin, lmin, lSum, minCyl, maxCyl, p, depth)) {
+                        contactCount = addCylCylContact(o1, o2, &maxCyl->axis, contacts, &p, normaldir, depth, contactCount, flags, skip);
+                        if ((flags & CONTACTS_UNIMPORTANT) != 0) {
+                            dIASSERT(contactCount != 0);
+                            break;
+                        }
+                    }
+                }
+
+                if (contactCount == 0 || (flags & CONTACTS_UNIMPORTANT) == 0) {
+                    // Then the larger disc, + additional point as the start/end points of arcs overlap
+                    // (or a single contact at the arc middle point if just one is required)
+                    dReal nMaxRecip = nMax > 1 ? diffA / (nMax - 1) : diffA; // The 'else' value does not matter. Just try helping the optimizer.
+                    dReal adjustedMinA = nMax == 1 ? minA + REAL(0.5) * diffA : minA;
+
+                    for (int i = 0; i < nMax; i++) {
+                        dReal depth;
+                        dReal a = adjustedMinA + nMaxRecip * i;
+                        if (testAndPrepareDiscContactForAngle(a, rmax, lmax, lSum, maxCyl, minCyl, p, depth)) {
+                            contactCount = addCylCylContact(o1, o2, &maxCyl->axis, contacts, &p, normaldir, depth, contactCount, flags, skip);
+                            if ((flags & CONTACTS_UNIMPORTANT) != 0) {
+                                dIASSERT(contactCount != 0);
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                return contactCount;
+            }
+        }
+    }
+    return -1;
+}
+
+static 
+bool testAndPrepareDiscContactForAngle(dReal angle, dReal radius, dReal length, dReal lSum, ccd_cyl_t *priCyl, ccd_cyl_t *secCyl, ccd_vec3_t &p, dReal &out_depth)
+{
+    bool ret = false;
+
+    ccd_vec3_t p2;
+    ccdVec3Set(&p, dCos(angle) * radius, dSin(angle) * radius, 0);
+    ccdQuatRotVec(&p, &priCyl->o.rot);
+    ccdVec3Add(&p, &priCyl->o.pos);
+    ccdVec3Copy(&p2, &p);
+    ccdVec3Sub(&p2, &secCyl->o.pos);
+    dReal depth = lSum - dFabs(ccdVec3Dot(&p2, &secCyl->axis));
+
+    if (depth >= 0) {
+        ccdVec3Copy(&p2, &priCyl->axis);
+        ccdVec3Scale(&p2, length);
+        ccdVec3Add(&p, &p2);
+
+        out_depth = depth;
+        ret = true;
+    }
+
+    return ret;
+}
+
+static 
+int addCylCylContact(dxGeom *o1, dxGeom *o2, ccd_vec3_t* axis, dContactGeom *contacts,
+               ccd_vec3_t* p, dReal normaldir, dReal depth, int j, int flags, int skip)
+{
+    dIASSERT(depth >= 0);
+
+    dContactGeom* contact = SAFECONTACT(flags, contacts, j, skip);
+    contact->g1 = o1;
+    contact->g2 = o2;
+    contact->side1 = -1;
+    contact->side2 = -1;
+    contact->normal[0] = normaldir * ccdVec3X(axis);
+    contact->normal[1] = normaldir * ccdVec3Y(axis);
+    contact->normal[2] = normaldir * ccdVec3Z(axis);
+    contact->depth = depth;
+    contact->pos[0] = ccdVec3X(p);
+    contact->pos[1] = ccdVec3Y(p);
+    contact->pos[2] = ccdVec3Z(p);
+
+    return j + 1;
+}
+
+
+#if dTRIMESH_ENABLED
+
+const static float CONTACT_DEPTH_EPSILON = 0.0001f;
+const static float CONTACT_POS_EPSILON = 0.0001f;
+const static float CONTACT_PERTURBATION_ANGLE = 0.001f;
+const static float NORMAL_PROJ_EPSILON = 0.0001f;
+
+
+/*extern */
+unsigned dCollideConvexTrimeshTrianglesCCD(dxGeom *o1, dxGeom *o2, const int *indices, unsigned numIndices, int flags, dContactGeom *contacts, int skip)
+{
+    ccd_convex_t c1;
+    ccd_triangle_t c2;
+    dVector3 triangle[dMTV__MAX];
+    unsigned maxContacts = (flags & NUMC_MASK);
+    unsigned contactCount = 0;
+    ccdGeomToConvex(o1, &c1);
+    ccdGeomToObj(o2, (ccd_obj_t *)&c2);
+
+    IFaceAngleStorageView *meshFaceAngleView = dxGeomTriMeshGetFaceAngleView(o2);
+    dUASSERT(meshFaceAngleView != NULL, "Please preprocess the trimesh data with dTRIDATAPREPROCESS_BUILD_FACE_ANGLES");
+
+    for (unsigned i = 0; i != numIndices; ++i) {
+        dContactGeom tempContact;
+        dGeomTriMeshGetTriangle(o2, indices[i], &triangle[dMTV_FIRST], &triangle[dMTV_SECOND], &triangle[dMTV_THIRD]);
+
+        for (unsigned j = dMTV__MIN; j != dMTV__MAX; ++j) {
+            ccdVec3Set(&c2.vertices[j], (ccd_real_t)triangle[j][dV3E_X], (ccd_real_t)triangle[j][dV3E_Y], (ccd_real_t)triangle[j][dV3E_Z]);
+        }
+
+        setObjPosToTriangleCenter(&c2);
+
+        if (ccdCollide(o1, o2, flags, &tempContact, skip, &c1, &ccdSupportConvex, &ccdCenter, &c2, &ccdSupportTriangle, &ccdCenter) == 1) {
+            tempContact.side2 = i;
+            
+            if (meshFaceAngleView == NULL || correctTriangleContactNormal(&c2, &tempContact, meshFaceAngleView, indices, numIndices)) {
+                contactCount = addUniqueContact(contacts, &tempContact, contactCount, maxContacts, flags, skip);
+
+                if ((flags & CONTACTS_UNIMPORTANT) != 0) {
+                    break;
+                }
+            }
+        }
+    }
+
+    if ((flags & CONTACTS_UNIMPORTANT) == 0 && contactCount == 1) {
+        dContactGeom *contact = SAFECONTACT(flags, contacts, 0, skip);
+        dGeomTriMeshGetTriangle(o2, contact->side2, &triangle[dMTV_FIRST], &triangle[dMTV_SECOND], &triangle[dMTV_THIRD]);
+        contactCount = addTrianglePerturbedContacts(o1, o2, meshFaceAngleView, indices, numIndices, flags, contacts, skip, &c1, &c2, triangle, contact, contactCount);
+    }
+
+    // Normalize accumulated normals, if necessary
+    for (unsigned k = 0; k != contactCount; ) {
+        dContactGeom *contact = SAFECONTACT(flags, contacts, k, skip);
+        bool stayWithinThisIndex = false;
+
+        // Only the merged contact normals need to be normalized
+        if (*_const_type_cast_union<bool>(&contact->normal[dV3E_PAD])) {
+        
+            if (!dxSafeNormalize3(contact->normal)) {
+                // If the contact normals have added up to zero, erase the contact
+                // Normally the time step is to be shorter so that the objects do not get into each other that deep
+                --contactCount;
+
+                if (k != contactCount) {
+                    dContactGeom *lastContact = SAFECONTACT(flags, contacts, contactCount, skip);
+                    *contact = *lastContact;
+                }
+
+                stayWithinThisIndex = true;
+            }
+        }
+
+        if (!stayWithinThisIndex) {
+            ++k;
+        }
+    }
+
+    return contactCount;
+}
+
+static 
+unsigned addTrianglePerturbedContacts(dxGeom *o1, dxGeom *o2, IFaceAngleStorageView *meshFaceAngleView, 
+    const int *indices, unsigned numIndices, int flags, dContactGeom *contacts, int skip,
+    ccd_convex_t *c1, ccd_triangle_t *c2, dVector3 *triangle, dContactGeom *contact, unsigned contacCount)
+{
+    unsigned maxContacts = (flags & NUMC_MASK);
+    
+    dVector3 pos;
+    dCopyVector3(pos, contact->pos);
+
+    dQuaternion q1[2], q2[2];
+    dReal perturbationAngle = CONTACT_PERTURBATION_ANGLE;
+
+    dVector3 upAxis;
+    bool upAvailable = false;
+    if (fabs(contact->normal[dV3E_Y]) > 0.7) {
+        dAssignVector3(upAxis, 0, 0, 1);
+    }
+    else {
+        dAssignVector3(upAxis, 0, 1, 0);
+    }
+
+    dVector3 cross;
+    dCalcVectorCross3(cross, contact->normal, upAxis);
+    
+    if (dSafeNormalize3(cross)) {
+        dCalcVectorCross3(upAxis, cross, contact->normal);
+
+        if (dSafeNormalize3(upAxis)) {
+            upAvailable = true;
+        }
+    }
+
+    for (unsigned j = upAvailable ? 0 : 2; j != 2; ++j) {
+        dQFromAxisAndAngle(q1[j], upAxis[dV3E_X], upAxis[dV3E_Y], upAxis[dV3E_Z], perturbationAngle);
+        dQFromAxisAndAngle(q2[j], cross[dV3E_X], cross[dV3E_Y], cross[dV3E_Z], perturbationAngle);
+        perturbationAngle = -perturbationAngle;
+    }
+
+    for (unsigned k = upAvailable ? 0 : 4; k != 4; ++k) {
+        dQuaternion qr;
+        dQMultiply0(qr, q1[k % 2], q2[k / 2]);
+
+        for (unsigned j = dMTV__MIN; j != dMTV__MAX; ++j) {
+            dVector3 p, perturbed;
+            dSubtractVectors3(p, triangle[j], pos);
+            dQuatTransform(qr, p, perturbed);
+            dAddVectors3(perturbed, perturbed, pos);
+
+            ccdVec3Set(&c2->vertices[j], (ccd_real_t)perturbed[dV3E_X], (ccd_real_t)perturbed[dV3E_Y], (ccd_real_t)perturbed[dV3E_Z]);
+        }
+
+        dContactGeom perturbedContact;
+        setObjPosToTriangleCenter(c2);
+
+        if (ccdCollide(o1, o2, flags, &perturbedContact, skip, c1, &ccdSupportConvex, &ccdCenter, c2, &ccdSupportTriangle, &ccdCenter) == 1) {
+            perturbedContact.side2 = contact->side2;
+            
+            if (meshFaceAngleView == NULL || correctTriangleContactNormal(c2, &perturbedContact, meshFaceAngleView, indices, numIndices)) {
+                contacCount = addUniqueContact(contacts, &perturbedContact, contacCount, maxContacts, flags, skip);
+            }
+        }
+    }
+
+    return contacCount;
+}
+
+static 
+bool correctTriangleContactNormal(ccd_triangle_t *t, dContactGeom *contact, 
+    IFaceAngleStorageView *meshFaceAngleView, const int *indices, unsigned numIndices) 
+{
+    dIASSERT(meshFaceAngleView != NULL);
+
+    bool anyFault = false;
+
+    ccd_vec3_t cntOrigNormal, cntNormal;
+    ccdVec3Set(&cntNormal, contact->normal[0], contact->normal[1], contact->normal[2]);
+    ccdVec3Copy(&cntOrigNormal, &cntNormal);
+
+    // Check if the contact point is located close to any edge - move it back and forth
+    // and check the resulting segment for intersection with the edge plane
+    ccd_vec3_t cntScaledNormal;
+    ccdVec3CopyScaled(&cntScaledNormal, &cntNormal, contact->depth);
+
+    ccd_vec3_t edges[dMTV__MAX];
+    ccdVec3Sub2(&edges[dMTV_THIRD], &t->vertices[0], &t->vertices[2]);
+    ccdVec3Sub2(&edges[dMTV_SECOND], &t->vertices[2], &t->vertices[1]);
+    ccdVec3Sub2(&edges[dMTV_FIRST], &t->vertices[1], &t->vertices[0]);
+    dSASSERT(dMTV__MAX == 3);
+
+    bool contactGenerated = false, contactPreserved = false;
+    // Triangle face normal
+    ccd_vec3_t triNormal;
+    ccdVec3Cross(&triNormal, &edges[dMTV_FIRST], &edges[dMTV_SECOND]);
+    if (ccdVec3SafeNormalize(&triNormal) != 0) {
+        anyFault = true;
+    }
+
+    // Check the edges to see if one of them is involved
+    for (unsigned testEdgeIndex = !anyFault ? dMTV__MIN : dMTV__MAX; testEdgeIndex != dMTV__MAX; ++testEdgeIndex) {
+        ccd_vec3_t edgeNormal, vertexToPos, v;
+        ccd_vec3_t &edgeAxis = edges[testEdgeIndex]; 
+        
+        // Edge axis
+        if (ccdVec3SafeNormalize(&edgeAxis) != 0) {
+            // This should not happen normally as in the case on of edges is degenerated
+            // the triangle normal calculation would have to fail above. If for some
+            // reason the above calculation succeeds and this one would not, it is
+            // OK to break as this point as well.
+            anyFault = true;
+            break;
+        }
+        
+        // Edge Normal
+        ccdVec3Cross(&edgeNormal, &edgeAxis, &triNormal);
+        // ccdVec3Normalize(&edgeNormal); -- the two vectors above were already normalized and perpendicular
+
+        // Check if the contact point is located close to any edge - move it back and forth
+        // and check the resulting segment for intersection with the edge plane
+        ccdVec3Set(&vertexToPos, contact->pos[0], contact->pos[1], contact->pos[2]);
+        ccdVec3Sub(&vertexToPos, &t->vertices[testEdgeIndex]);
+        ccdVec3Sub2(&v, &vertexToPos, &cntScaledNormal);
+        
+        if (ccdVec3Dot(&edgeNormal, &v) < 0) {
+            ccdVec3Add2(&v, &vertexToPos, &cntScaledNormal);
+            
+            if (ccdVec3Dot(&edgeNormal, &v) > 0) {
+                // This is an edge contact
+    
+                ccd_real_t x = ccdVec3Dot(&triNormal, &cntNormal);
+                ccd_real_t y = ccdVec3Dot(&edgeNormal, &cntNormal);
+                ccd_real_t contactNormalToTriangleNormalAngle = CCD_ATAN2(y, x);
+
+                dReal angleValueAsDRead;
+                FaceAngleDomain angleDomain = meshFaceAngleView->retrieveFacesAngleFromStorage(angleValueAsDRead, contact->side2, (dMeshTriangleVertex)testEdgeIndex);
+                ccd_real_t angleValue = (ccd_real_t)angleValueAsDRead;
+
+                ccd_real_t targetAngle;
+                contactGenerated = false, contactPreserved = false; // re-assign to make optimizer's task easier
+
+                if (angleDomain != FAD_CONCAVE) {
+                    // Convex or flat - ensure the contact normal is within the allowed range
+                    // formed by the two triangles' normals.
+                    if (contactNormalToTriangleNormalAngle < CCD_ZERO) {
+                        targetAngle = CCD_ZERO;
+                    }
+                    else if (contactNormalToTriangleNormalAngle > angleValue) {
+                        targetAngle = angleValue;
+                    }
+                    else {
+                        contactPreserved = true;
+                    }
+                }
+                else {
+                    // Concave - rotate the contact normal to the face angle bisect plane
+                    // (or to triangle normal-edge plane if negative angles are not stored)
+                    targetAngle = angleValue != 0 ? CCD_REAL(0.5) * angleValue : CCD_ZERO;
+                    // There is little chance the normal will initially match the correct plane, but still, a small check could save lots of calculations
+                    if (contactNormalToTriangleNormalAngle == targetAngle) {
+                        contactPreserved = true;
+                    }
+                }
+
+                if (!contactPreserved) {
+                    ccd_quat_t q;
+                    ccdQuatSetAngleAxis(&q, targetAngle - contactNormalToTriangleNormalAngle, &edgeAxis);
+                    ccdQuatRotVec2(&cntNormal, &cntNormal, &q);
+                    contactGenerated = true;
+                }
+
+                // Calculated successfully
+                break;
+            }
+        }
+    }
+
+    if (!anyFault && !contactPreserved) {
+        // No edge contact detected, set contact normal to triangle normal
+        const ccd_vec3_t &cntNormalToUse = !contactGenerated ? triNormal : cntNormal;
+
+        contact->normal[dV3E_X] = ccdVec3X(&cntNormalToUse);
+        contact->normal[dV3E_Y] = ccdVec3Y(&cntNormalToUse);
+        contact->normal[dV3E_Z] = ccdVec3Z(&cntNormalToUse);
+        contact->depth *= CCD_FMAX(0.0, ccdVec3Dot(&cntOrigNormal, &cntNormalToUse));
+    }
+
+    bool result = !anyFault;
+    return result;
+}
+
+
+static 
+unsigned addUniqueContact(dContactGeom *contacts, dContactGeom *c, unsigned contactcount, unsigned maxcontacts, int flags, int skip)
+{
+    dReal minDepth = c->depth;
+    unsigned index = contactcount;
+    bool isDuplicate = false;
+
+    dReal c_posX = c->pos[dV3E_X], c_posY = c->pos[dV3E_Y], c_posZ = c->pos[dV3E_Z];
+    for (unsigned k = 0; k != contactcount; k++) {
+        dContactGeom* pc = SAFECONTACT(flags, contacts, k, skip);
+        
+        if (fabs(c_posX - pc->pos[dV3E_X]) < CONTACT_POS_EPSILON
+            && fabs(c_posY - pc->pos[dV3E_Y]) < CONTACT_POS_EPSILON
+            && fabs(c_posZ - pc->pos[dV3E_Z]) < CONTACT_POS_EPSILON) {
+                dSASSERT(dV3E__AXES_MAX - dV3E__AXES_MIN == 3);
+
+                // Accumulate similar contacts
+                dAddVectors3(pc->normal, pc->normal, c->normal);
+                pc->depth = dMax(pc->depth, c->depth);
+                *_type_cast_union<bool>(&pc->normal[dV3E_PAD]) = true; // Mark the contact as a merged one
+
+                isDuplicate = true;
+                break;
+        }
+        
+        if (contactcount == maxcontacts && pc->depth < minDepth) {
+            minDepth = pc->depth;
+            index = k;
+        }
+    }
+
+    if (!isDuplicate && index < maxcontacts) {
+        dContactGeom* contact = SAFECONTACT(flags, contacts, index, skip);
+        contact->g1 = c->g1;
+        contact->g2 = c->g2;
+        contact->depth = c->depth;
+        contact->side1 = c->side1;
+        contact->side2 = c->side2;
+        dCopyVector3(contact->pos, c->pos);
+        dCopyVector3(contact->normal, c->normal);
+        *_type_cast_union<bool>(&contact->normal[dV3E_PAD]) = false; // Indicates whether the contact is merged or not
+        contactcount = index == contactcount ? contactcount + 1 : contactcount;
+    }
+
+    return contactcount;
+}
+
+static 
+void setObjPosToTriangleCenter(ccd_triangle_t *t) 
+{
+    ccdVec3Set(&t->o.pos, 0, 0, 0);
+    for (int j = 0; j < 3; j++) {
+        ccdVec3Add(&t->o.pos, &t->vertices[j]);
+    }
+    ccdVec3Scale(&t->o.pos, 1.0f / 3.0f);
+}
+
+static 
+void ccdSupportTriangle(const void *obj, const ccd_vec3_t *_dir, ccd_vec3_t *v)
+{
+    const ccd_triangle_t* o = (ccd_triangle_t *) obj;
+    ccd_real_t maxdot, dot;
+    maxdot = -CCD_REAL_MAX;
+    for (unsigned i = 0; i != 3; i++) {
+        dot = ccdVec3Dot(_dir, &o->vertices[i]);
+        if (dot > maxdot) {
+            ccdVec3Copy(v, &o->vertices[i]);
+            maxdot = dot;
+        }
+    }
+}
+
+
+#endif // dTRIMESH_ENABLED
diff --git a/libs/ode-0.16.1/ode/src/collision_libccd.h b/libs/ode-0.16.1/ode/src/collision_libccd.h
new file mode 100644
index 0000000..13c67ba
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_libccd.h
@@ -0,0 +1,44 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _LIBCCD_COLLISION_H_
+#define _LIBCCD_COLLISION_H_
+
+int dCollideCylinderCylinder(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+int dCollideBoxCylinderCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+int dCollideCapsuleCylinder(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+int dCollideConvexBoxCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+int dCollideConvexCapsuleCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+int dCollideConvexCylinderCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+int dCollideConvexSphereCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+int dCollideConvexConvexCCD(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+unsigned dCollideConvexTrimeshTrianglesCCD(dxGeom *o1, dxGeom *o2, const int *indices, unsigned numIndices, int flags, dContactGeom *contacts, int skip);
+
+#endif /* _LIBCCD_COLLISION_H_ */
diff --git a/libs/ode-0.16.1/ode/src/collision_quadtreespace.cpp b/libs/ode-0.16.1/ode/src/collision_quadtreespace.cpp
new file mode 100644
index 0000000..200b20f
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_quadtreespace.cpp
@@ -0,0 +1,609 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// QuadTreeSpace by Erwin de Vries.
+// With math corrections by Oleh Derevenko. ;)
+
+#include <ode/common.h>
+#include <ode/collision_space.h>
+#include <ode/collision.h>
+#include "config.h"
+#include "matrix.h"
+#include "collision_kernel.h"
+
+#include "collision_space_internal.h"
+
+
+#define AXIS0 0
+#define AXIS1 1
+#define UP 2
+
+//#define DRAWBLOCKS
+
+const int SPLITAXIS = 2;
+const int SPLITS = SPLITAXIS * SPLITAXIS;
+
+#define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE)
+
+class Block{
+public:
+    dReal mMinX, mMaxX;
+    dReal mMinZ, mMaxZ;
+
+    dGeomID mFirst;
+    int mGeomCount;
+
+    Block* mParent;
+    Block* mChildren;
+
+    void Create(const dReal MinX, const dReal MaxX, const dReal MinZ, const dReal MaxZ, Block* Parent, int Depth, Block*& Blocks);
+
+    void Collide(void* UserData, dNearCallback* Callback);
+    void Collide(dGeomID g1, dGeomID g2, void* UserData, dNearCallback* Callback);
+
+    void CollideLocal(dGeomID g2, void* UserData, dNearCallback* Callback);
+
+    void AddObject(dGeomID Object);
+    void DelObject(dGeomID Object);
+    void Traverse(dGeomID Object);
+
+    bool Inside(const dReal* AABB);
+
+    Block* GetBlock(const dReal* AABB);
+    Block* GetBlockChild(const dReal* AABB);
+};
+
+
+#ifdef DRAWBLOCKS
+#include "..\..\Include\drawstuff\\drawstuff.h"
+
+static void DrawBlock(Block* Block){
+    dVector3 v[8];
+    v[0][AXIS0] = Block->mMinX;
+    v[0][UP] = REAL(-1.0);
+    v[0][AXIS1] = Block->mMinZ;
+
+    v[1][AXIS0] = Block->mMinX;
+    v[1][UP] = REAL(-1.0);
+    v[1][AXIS1] = Block->mMaxZ;
+
+    v[2][AXIS0] = Block->mMaxX;
+    v[2][UP] = REAL(-1.0);
+    v[2][AXIS1] = Block->mMinZ;
+
+    v[3][AXIS0] = Block->mMaxX;
+    v[3][UP] = REAL(-1.0);
+    v[3][AXIS1] = Block->mMaxZ;
+
+    v[4][AXIS0] = Block->mMinX;
+    v[4][UP] = REAL(1.0);
+    v[4][AXIS1] = Block->mMinZ;
+
+    v[5][AXIS0] = Block->mMinX;
+    v[5][UP] = REAL(1.0);
+    v[5][AXIS1] = Block->mMaxZ;
+
+    v[6][AXIS0] = Block->mMaxX;
+    v[6][UP] = REAL(1.0);
+    v[6][AXIS1] = Block->mMinZ;
+
+    v[7][AXIS0] = Block->mMaxX;
+    v[7][UP] = REAL(1.0);
+    v[7][AXIS1] = Block->mMaxZ;
+
+    // Bottom
+    dsDrawLine(v[0], v[1]);
+    dsDrawLine(v[1], v[3]);
+    dsDrawLine(v[3], v[2]);
+    dsDrawLine(v[2], v[0]);
+
+    // Top
+    dsDrawLine(v[4], v[5]);
+    dsDrawLine(v[5], v[7]);
+    dsDrawLine(v[7], v[6]);
+    dsDrawLine(v[6], v[4]);
+
+    // Sides
+    dsDrawLine(v[0], v[4]);
+    dsDrawLine(v[1], v[5]);
+    dsDrawLine(v[2], v[6]);
+    dsDrawLine(v[3], v[7]);
+}
+#endif	//DRAWBLOCKS
+
+
+void Block::Create(const dReal MinX, const dReal MaxX, const dReal MinZ, const dReal MaxZ, Block* Parent, int Depth, Block*& Blocks){
+    dIASSERT(MinX <= MaxX);
+    dIASSERT(MinZ <= MaxZ);
+
+    mGeomCount = 0;
+    mFirst = 0;
+
+    mMinX = MinX;
+    mMaxX = MaxX;
+
+    mMinZ = MinZ;
+    mMaxZ = MaxZ;
+
+    this->mParent = Parent;
+
+    if (Depth > 0){
+        mChildren = Blocks;
+        Blocks += SPLITS;
+
+        const dReal ChildExtentX = (MaxX - MinX) / SPLITAXIS;
+        const dReal ChildExtentZ = (MaxZ - MinZ) / SPLITAXIS;
+
+        const int ChildDepth = Depth - 1;
+        int Index = 0;
+
+        dReal ChildRightX = MinX;
+        for (int i = 0; i < SPLITAXIS; i++){
+            const dReal ChildLeftX = ChildRightX;
+            ChildRightX = (i != SPLITAXIS - 1) ? ChildLeftX + ChildExtentX : MaxX;
+
+            dReal ChildRightZ = MinZ;
+            for (int j = 0; j < SPLITAXIS; j++){
+                const dReal ChildLeftZ = ChildRightZ;
+                ChildRightZ = (j != SPLITAXIS - 1) ? ChildLeftZ + ChildExtentZ : MaxZ;
+
+                mChildren[Index].Create(ChildLeftX, ChildRightX, ChildLeftZ, ChildRightZ, this, ChildDepth, Blocks);
+                ++Index;
+            }
+        }
+    }
+    else mChildren = 0;
+}
+
+void Block::Collide(void* UserData, dNearCallback* Callback){
+#ifdef DRAWBLOCKS
+    DrawBlock(this);
+#endif
+    // Collide the local list
+    dxGeom* g = mFirst;
+    while (g){
+        if (GEOM_ENABLED(g)){
+            Collide(g, g->next_ex, UserData, Callback);
+        }
+        g = g->next_ex;
+    }
+
+    // Recurse for children
+    if (mChildren){
+        for (int i = 0; i < SPLITS; i++){
+            Block &CurrentChild = mChildren[i];
+            if (CurrentChild.mGeomCount <= 1){	// Early out
+                continue;
+            }
+            CurrentChild.Collide(UserData, Callback);
+        }
+    }
+}
+
+// Note: g2 is assumed to be in this Block
+void Block::Collide(dxGeom* g1, dxGeom* g2, void* UserData, dNearCallback* Callback){
+#ifdef DRAWBLOCKS
+    DrawBlock(this);
+#endif
+    // Collide against local list
+    while (g2){
+        if (GEOM_ENABLED(g2)){
+            collideAABBs (g1, g2, UserData, Callback);
+        }
+        g2 = g2->next_ex;
+    }
+
+    // Collide against children
+    if (mChildren){
+        for (int i = 0; i < SPLITS; i++){
+            Block &CurrentChild = mChildren[i];
+            // Early out for empty blocks
+            if (CurrentChild.mGeomCount == 0){
+                continue;
+            }
+
+            // Does the geom's AABB collide with the block?
+            // Don't do AABB tests for single geom blocks.
+            if (CurrentChild.mGeomCount == 1){
+                //
+            }
+            else if (true){
+                if (g1->aabb[AXIS0 * 2 + 0] >= CurrentChild.mMaxX ||
+                    g1->aabb[AXIS0 * 2 + 1] < CurrentChild.mMinX ||
+                    g1->aabb[AXIS1 * 2 + 0] >= CurrentChild.mMaxZ ||
+                    g1->aabb[AXIS1 * 2 + 1] < CurrentChild.mMinZ) continue;
+            }
+            CurrentChild.Collide(g1, CurrentChild.mFirst, UserData, Callback);
+        }
+    }
+}
+
+void Block::CollideLocal(dxGeom* g2, void* UserData, dNearCallback* Callback){
+    // Collide against local list
+    dxGeom* g1 = mFirst;
+    while (g1){
+        if (GEOM_ENABLED(g1)){
+            collideAABBs (g1, g2, UserData, Callback);
+        }
+        g1 = g1->next_ex;
+    }
+}
+
+void Block::AddObject(dGeomID Object){
+    // Add the geom
+    Object->next_ex = mFirst;
+    mFirst = Object;
+    Object->tome_ex = (dxGeom**)this;
+
+    // Now traverse upwards to tell that we have a geom
+    Block* Block = this;
+    do{
+        Block->mGeomCount++;
+        Block = Block->mParent;
+    }
+    while (Block);
+}
+
+void Block::DelObject(dGeomID Object){
+    // Del the geom
+    dxGeom* g = mFirst;
+    dxGeom* Last = 0;
+    while (g){
+        if (g == Object){
+            if (Last){
+                Last->next_ex = g->next_ex;
+            }
+            else mFirst = g->next_ex;
+
+            break;
+        }
+        Last = g;
+        g = g->next_ex;
+    }
+
+    Object->tome_ex = 0;
+
+    // Now traverse upwards to tell that we have lost a geom
+    Block* Block = this;
+    do{
+        Block->mGeomCount--;
+        Block = Block->mParent;
+    }
+    while (Block);
+}
+
+void Block::Traverse(dGeomID Object){
+    Block* NewBlock = GetBlock(Object->aabb);
+
+    if (NewBlock != this){
+        // Remove the geom from the old block and add it to the new block.
+        // This could be more optimal, but the loss should be very small.
+        DelObject(Object);
+        NewBlock->AddObject(Object);
+    }
+}
+
+bool Block::Inside(const dReal* AABB){
+    return AABB[AXIS0 * 2 + 0] >= mMinX && AABB[AXIS0 * 2 + 1] < mMaxX && AABB[AXIS1 * 2 + 0] >= mMinZ && AABB[AXIS1 * 2 + 1] < mMaxZ;
+}
+
+Block* Block::GetBlock(const dReal* AABB){
+    if (Inside(AABB)){
+        return GetBlockChild(AABB);	// Child or this will have a good block
+    }
+    else if (mParent){
+        return mParent->GetBlock(AABB);	// Parent has a good block
+    }
+    else return this;	// We are at the root, so we have little choice
+}
+
+Block* Block::GetBlockChild(const dReal* AABB){
+    if (mChildren){
+        for (int i = 0; i < SPLITS; i++){
+            Block &CurrentChild = mChildren[i];
+            if (CurrentChild.Inside(AABB)){
+                return CurrentChild.GetBlockChild(AABB);	// Child will have good block
+            }
+        }
+    }
+    return this;	// This is the best block
+}
+
+//****************************************************************************
+// quadtree space
+
+struct dxQuadTreeSpace : public dxSpace{
+    Block* Blocks;	// Blocks[0] is the root
+
+    dArray<dxGeom*> DirtyList;
+
+    dxQuadTreeSpace(dSpaceID _space, const dVector3 Center, const dVector3 Extents, int Depth);
+    ~dxQuadTreeSpace();
+
+    dxGeom* getGeom(int i);
+
+    void add(dxGeom* g);
+    void remove(dxGeom* g);
+    void dirty(dxGeom* g);
+
+    void computeAABB();
+
+    void cleanGeoms();
+    void collide(void* UserData, dNearCallback* Callback);
+    void collide2(void* UserData, dxGeom* g1, dNearCallback* Callback);
+
+    // Temp data
+    Block* CurrentBlock;	// Only used while enumerating
+    int* CurrentChild;	// Only used while enumerating
+    int CurrentLevel;	// Only used while enumerating
+    dxGeom* CurrentObject;	// Only used while enumerating
+    int CurrentIndex;
+};
+
+namespace {
+
+    inline
+    sizeint numNodes(int depth) 
+    {
+        // A 4-ary tree has (4^(depth+1) - 1)/3 nodes
+        // Note: split up into multiple constant expressions for readability
+        const int k = depth+1;
+        const sizeint fourToNthPlusOne = (sizeint)1 << (2*k); // 4^k = 2^(2k)
+        return (fourToNthPlusOne - 1) / 3;
+    }
+
+}
+
+
+
+dxQuadTreeSpace::dxQuadTreeSpace(dSpaceID _space, const dVector3 Center, const dVector3 Extents, int Depth) : dxSpace(_space){
+    type = dQuadTreeSpaceClass;
+
+    sizeint BlockCount = numNodes(Depth);
+
+    Blocks = (Block*)dAlloc(BlockCount * sizeof(Block));
+    Block* Blocks = this->Blocks + 1;	// This pointer gets modified!
+
+    dReal MinX = Center[AXIS0] - Extents[AXIS0];
+    dReal MaxX = dNextAfter((Center[AXIS0] + Extents[AXIS0]), (dReal)dInfinity);
+    dReal MinZ = Center[AXIS1] - Extents[AXIS1];
+    dReal MaxZ = dNextAfter((Center[AXIS1] + Extents[AXIS1]), (dReal)dInfinity);
+    this->Blocks[0].Create(MinX, MaxX, MinZ, MaxZ, 0, Depth, Blocks);
+
+    CurrentBlock = 0;
+    CurrentChild = (int*)dAlloc((Depth + 1) * sizeof(int));
+    CurrentLevel = 0;
+    CurrentObject = 0;
+    CurrentIndex = -1;
+
+    // Init AABB. We initialize to infinity because it is not illegal for an object to be outside of the tree. Its simply inserted in the root block
+    aabb[0] = -dInfinity;
+    aabb[1] = dInfinity;
+    aabb[2] = -dInfinity;
+    aabb[3] = dInfinity;
+    aabb[4] = -dInfinity;
+    aabb[5] = dInfinity;
+}
+
+dxQuadTreeSpace::~dxQuadTreeSpace(){
+    int Depth = 0;
+    Block* Current = &Blocks[0];
+    while (Current){
+        Depth++;
+        Current = Current->mChildren;
+    }
+
+    sizeint BlockCount = numNodes(Depth);
+
+    dFree(Blocks, BlockCount * sizeof(Block));
+    dFree(CurrentChild, (Depth + 1) * sizeof(int));
+}
+
+dxGeom* dxQuadTreeSpace::getGeom(int Index){
+    dUASSERT(Index >= 0 && Index < count, "index out of range");
+
+    //@@@
+    dDebug (0,"dxQuadTreeSpace::getGeom() not yet implemented");
+
+    return 0;
+
+    // This doesnt work
+/*
+    if (CurrentIndex == Index){
+        // Loop through all objects in the local list
+CHILDRECURSE:
+        if (CurrentObject){
+            dGeomID g = CurrentObject;
+            CurrentObject = CurrentObject->next_ex;
+            CurrentIndex++;
+
+#ifdef DRAWBLOCKS
+            DrawBlock(CurrentBlock);
+#endif	//DRAWBLOCKS
+            return g;
+        }
+        else{
+            // Now lets loop through our children. Starting at index 0.
+            if (CurrentBlock->Children){
+                CurrentChild[CurrentLevel] = 0;
+PARENTRECURSE:
+                for (int& i = CurrentChild[CurrentLevel]; i < SPLITS; i++){
+                    if (CurrentBlock->Children[i].GeomCount == 0){
+                        continue;
+                    }
+                    CurrentBlock = &CurrentBlock->Children[i];
+                    CurrentObject = CurrentBlock->First;
+
+                    i++;
+
+                    CurrentLevel++;
+                    goto CHILDRECURSE;
+                }
+            }
+        }
+
+        // Now lets go back to the parent so it can continue processing its other children.
+        if (CurrentBlock->Parent){
+            CurrentBlock = CurrentBlock->Parent;
+            CurrentLevel--;
+            goto PARENTRECURSE;
+        }
+    }
+    else{
+        CurrentBlock = &Blocks[0];
+        CurrentLevel = 0;
+        CurrentObject = CurrentObject;
+        CurrentIndex = 0;
+
+        // Other states are already set
+        CurrentObject = CurrentBlock->First;
+    }
+
+
+    if (current_geom && current_index == Index - 1){
+        //current_geom = current_geom->next_ex; // next
+        current_index = Index;
+        return current_geom;
+    }
+    else for (int i = 0; i < Index; i++){	// this will be verrrrrrry slow
+        getGeom(i);
+    }
+*/
+
+    return 0;
+}
+
+void dxQuadTreeSpace::add(dxGeom* g){
+    CHECK_NOT_LOCKED (this);
+    dAASSERT(g);
+    dUASSERT(g->tome_ex == 0 && g->next_ex == 0, "geom is already in a space");
+
+    DirtyList.push(g);
+    Blocks[0].GetBlock(g->aabb)->AddObject(g);	// Add to best block
+
+    dxSpace::add(g);
+}
+
+void dxQuadTreeSpace::remove(dxGeom* g){
+    CHECK_NOT_LOCKED(this);
+    dAASSERT(g);
+    dUASSERT(g->parent_space == this,"object is not in this space");
+
+    // remove
+    ((Block*)g->tome_ex)->DelObject(g);
+
+    for (int i = 0; i < DirtyList.size(); i++){
+        if (DirtyList[i] == g){
+            DirtyList.remove(i);
+            // (mg) there can be multiple instances of a dirty object on stack  be sure to remove ALL and not just first, for this we decrement i
+            --i;
+        }
+    }
+
+    dxSpace::remove(g);
+}
+
+void dxQuadTreeSpace::dirty(dxGeom* g){
+    DirtyList.push(g);
+}
+
+void dxQuadTreeSpace::computeAABB(){
+    //
+}
+
+void dxQuadTreeSpace::cleanGeoms(){
+    // compute the AABBs of all dirty geoms, and clear the dirty flags
+    lock_count++;
+
+    for (int i = 0; i < DirtyList.size(); i++){
+        dxGeom* g = DirtyList[i];
+        if (IS_SPACE(g)){
+            ((dxSpace*)g)->cleanGeoms();
+        }
+        
+        g->recomputeAABB();
+        dIASSERT((g->gflags & GEOM_AABB_BAD) == 0);
+
+        g->gflags &= ~GEOM_DIRTY;
+
+        ((Block*)g->tome_ex)->Traverse(g);
+    }
+    DirtyList.setSize(0);
+
+    lock_count--;
+}
+
+void dxQuadTreeSpace::collide(void* UserData, dNearCallback* Callback){
+    dAASSERT(Callback);
+
+    lock_count++;
+    cleanGeoms();
+
+    Blocks[0].Collide(UserData, Callback);
+
+    lock_count--;
+}
+
+
+struct DataCallback {
+    void *data;
+    dNearCallback *callback;
+};
+// Invokes the callback with arguments swapped
+static void swap_callback(void *data, dxGeom *g1, dxGeom *g2)
+{
+    DataCallback *dc = (DataCallback*)data;
+    dc->callback(dc->data, g2, g1);
+}
+
+
+void dxQuadTreeSpace::collide2(void* UserData, dxGeom* g2, dNearCallback* Callback){
+    dAASSERT(g2 && Callback);
+
+    lock_count++;
+    cleanGeoms();
+    g2->recomputeAABB();
+
+    if (g2->parent_space == this){
+        // The block the geom is in
+        Block* CurrentBlock = (Block*)g2->tome_ex;
+
+        // Collide against block and its children
+        DataCallback dc = {UserData, Callback};
+        CurrentBlock->Collide(g2, CurrentBlock->mFirst, &dc, swap_callback);
+
+        // Collide against parents
+        while ((CurrentBlock = CurrentBlock->mParent))
+            CurrentBlock->CollideLocal(g2, UserData, Callback);
+
+    }
+    else {
+        DataCallback dc = {UserData, Callback};
+        Blocks[0].Collide(g2, Blocks[0].mFirst, &dc, swap_callback);
+    }
+
+    lock_count--;
+}
+
+dSpaceID dQuadTreeSpaceCreate(dxSpace* space, const dVector3 Center, const dVector3 Extents, int Depth){
+    return new dxQuadTreeSpace(space, Center, Extents, Depth);
+}
diff --git a/libs/ode-0.16.1/ode/src/collision_sapspace.cpp b/libs/ode-0.16.1/ode/src/collision_sapspace.cpp
new file mode 100644
index 0000000..76258bf
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_sapspace.cpp
@@ -0,0 +1,853 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  Sweep and Prune adaptation/tweaks for ODE by Aras Pranckevicius.
+ *  Additional work by David Walters
+ *  Original code:
+ *    OPCODE - Optimized Collision Detection
+ *    Copyright (C) 2001 Pierre Terdiman
+ *    Homepage: http://www.codercorner.com/Opcode.htm
+ *
+ *  This version does complete radix sort, not "classical" SAP. So, we
+ *  have no temporal coherence, but are able to handle any movement
+ *  velocities equally well.
+ */
+
+#include <ode/common.h>
+#include <ode/collision_space.h>
+#include <ode/collision.h>
+
+#include "config.h"
+#include "matrix.h"
+#include "collision_kernel.h"
+#include "collision_space_internal.h"
+
+// Reference counting helper for radix sort global data.
+//static void RadixSortRef();
+//static void RadixSortDeref();
+
+
+// --------------------------------------------------------------------------
+//  Radix Sort Context
+// --------------------------------------------------------------------------
+
+struct RaixSortContext
+{
+public:
+    RaixSortContext(): mCurrentSize(0), mCurrentUtilization(0), mRanksValid(false), mRanksBuffer(NULL), mPrimaryRanks(NULL) {}
+    ~RaixSortContext() { FreeRanks(); }
+
+    // OPCODE's Radix Sorting, returns a list of indices in sorted order
+    const uint32* RadixSort( const float* input2, uint32 nb );
+
+private:
+    void FreeRanks();
+    void AllocateRanks(sizeint nNewSize);
+
+    void ReallocateRanksIfNecessary(sizeint nNewSize);
+
+private:
+    void SetCurrentSize(sizeint nValue) { mCurrentSize = nValue; }
+    sizeint GetCurrentSize() const { return mCurrentSize; }
+
+    void SetCurrentUtilization(sizeint nValue) { mCurrentUtilization = nValue; }
+    sizeint GetCurrentUtilization() const { return mCurrentUtilization; }
+
+    uint32 *GetRanks1() const { return mPrimaryRanks; }
+    uint32 *GetRanks2() const { return mRanksBuffer + ((mRanksBuffer + mCurrentSize) - mPrimaryRanks); }
+    void SwapRanks() { mPrimaryRanks = GetRanks2(); }
+
+    bool AreRanksValid() const { return mRanksValid; }
+    void InvalidateRanks() { mRanksValid = false; }
+    void ValidateRanks() { mRanksValid = true; }
+
+private:
+    sizeint mCurrentSize;						//!< Current size of the indices list
+    sizeint mCurrentUtilization;					//!< Current utilization of the indices list
+    bool mRanksValid;
+    uint32* mRanksBuffer;						//!< Two lists allocated sequentially in a single block
+    uint32* mPrimaryRanks;
+};
+
+void RaixSortContext::AllocateRanks(sizeint nNewSize)
+{
+    dIASSERT(GetCurrentSize() == 0);
+
+    mRanksBuffer = new uint32[2 * nNewSize];
+    mPrimaryRanks = mRanksBuffer;
+
+    SetCurrentSize(nNewSize);
+}
+
+void RaixSortContext::FreeRanks()
+{
+    SetCurrentSize(0);
+
+    delete[] mRanksBuffer;
+}
+
+void RaixSortContext::ReallocateRanksIfNecessary(sizeint nNewSize)
+{
+    sizeint nCurUtilization = GetCurrentUtilization();
+
+    if (nNewSize != nCurUtilization)
+    {
+        sizeint nCurSize = GetCurrentSize();
+
+        if ( nNewSize > nCurSize )
+        {
+            // Free previously used ram
+            FreeRanks();
+
+            // Get some fresh one
+            AllocateRanks(nNewSize);
+        }
+
+        InvalidateRanks();
+        SetCurrentUtilization(nNewSize);
+    }
+}
+
+// --------------------------------------------------------------------------
+//  SAP space code
+// --------------------------------------------------------------------------
+
+struct dxSAPSpace : public dxSpace
+{
+    // Constructor / Destructor
+    dxSAPSpace( dSpaceID _space, int sortaxis );
+    ~dxSAPSpace();
+
+    // dxSpace
+    virtual dxGeom* getGeom(int i);
+    virtual void add(dxGeom* g);
+    virtual void remove(dxGeom* g);
+    virtual void dirty(dxGeom* g);
+    virtual void computeAABB();
+    virtual void cleanGeoms();
+    virtual void collide( void *data, dNearCallback *callback );
+    virtual void collide2( void *data, dxGeom *geom, dNearCallback *callback );
+
+private:
+
+    //--------------------------------------------------------------------------
+    // Local Declarations
+    //--------------------------------------------------------------------------
+
+    //! A generic couple structure
+    struct Pair
+    {
+        uint32 id0;	//!< First index of the pair
+        uint32 id1;	//!< Second index of the pair
+
+        // Default and Value Constructor
+        Pair() {}
+        Pair( uint32 i0, uint32 i1 ) : id0( i0 ), id1( i1 ) {}
+    };
+
+    //--------------------------------------------------------------------------
+    // Helpers
+    //--------------------------------------------------------------------------
+
+    /**
+    *	Complete box pruning.
+    *  Returns a list of overlapping pairs of boxes, each box of the pair
+    *  belongs to the same set.
+    *
+    *	@param	count	[in] number of boxes.
+    *	@param	geoms	[in] geoms of boxes.
+    *	@param	pairs	[out] array of overlapping pairs.
+    */
+    void BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs );
+
+
+    //--------------------------------------------------------------------------
+    // Implementation Data
+    //--------------------------------------------------------------------------
+
+    // We have two lists (arrays of pointers) to dirty and clean
+    // geoms. Each geom knows it's index into the corresponding list
+    // (see macros above).
+    dArray<dxGeom*> DirtyList; // dirty geoms
+    dArray<dxGeom*> GeomList;	// clean geoms
+
+    // For SAP, we ultimately separate "normal" geoms and the ones that have
+    // infinite AABBs. No point doing SAP on infinite ones (and it doesn't handle
+    // infinite geoms anyway).
+    dArray<dxGeom*> TmpGeomList;	// temporary for normal geoms
+    dArray<dxGeom*> TmpInfGeomList;	// temporary for geoms with infinite AABBs
+
+    // Our sorting axes. (X,Z,Y is often best). Stored *2 for minor speedup
+    // Axis indices into geom's aabb are: min=idx, max=idx+1
+    uint32 ax0idx;
+    uint32 ax1idx;
+    uint32 ax2idx;
+
+    // pruning position array scratch pad
+    // NOTE: this is float not dReal because of the OPCODE radix sorter
+    dArray< float > poslist;
+    RaixSortContext	sortContext;
+};
+
+// Creation
+dSpaceID dSweepAndPruneSpaceCreate( dxSpace* space, int axisorder ) {
+    return new dxSAPSpace( space, axisorder );
+}
+
+
+//==============================================================================
+
+#define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE)
+
+// HACK: We abuse 'next' and 'tome' members of dxGeom to store indices into dirty/geom lists.
+#define GEOM_SET_DIRTY_IDX(g,idx) { (g)->next_ex = (dxGeom*)(sizeint)(idx); }
+#define GEOM_SET_GEOM_IDX(g,idx) { (g)->tome_ex = (dxGeom**)(sizeint)(idx); }
+#define GEOM_GET_DIRTY_IDX(g) ((int)(sizeint)(g)->next_ex)
+#define GEOM_GET_GEOM_IDX(g) ((int)(sizeint)(g)->tome_ex)
+#define GEOM_INVALID_IDX (-1)
+
+
+/*
+*  A bit of repetitive work - similar to collideAABBs, but doesn't check
+*  if AABBs intersect (because SAP returns pairs with overlapping AABBs).
+*/
+static void collideGeomsNoAABBs( dxGeom *g1, dxGeom *g2, void *data, dNearCallback *callback )
+{
+    dIASSERT( (g1->gflags & GEOM_AABB_BAD)==0 );
+    dIASSERT( (g2->gflags & GEOM_AABB_BAD)==0 );
+
+    // no contacts if both geoms on the same body, and the body is not 0
+    if (g1->body == g2->body && g1->body) return;
+
+    // test if the category and collide bitfields match
+    if ( ((g1->category_bits & g2->collide_bits) ||
+        (g2->category_bits & g1->collide_bits)) == 0) {
+            return;
+    }
+
+    dReal *bounds1 = g1->aabb;
+    dReal *bounds2 = g2->aabb;
+
+    // check if either object is able to prove that it doesn't intersect the
+    // AABB of the other
+    if (g1->AABBTest (g2,bounds2) == 0) return;
+    if (g2->AABBTest (g1,bounds1) == 0) return;
+
+    // the objects might actually intersect - call the space callback function
+    callback (data,g1,g2);
+}
+
+
+dxSAPSpace::dxSAPSpace( dSpaceID _space, int axisorder ) : dxSpace( _space )
+{
+    type = dSweepAndPruneSpaceClass;
+
+    // Init AABB to infinity
+    aabb[0] = -dInfinity;
+    aabb[1] = dInfinity;
+    aabb[2] = -dInfinity;
+    aabb[3] = dInfinity;
+    aabb[4] = -dInfinity;
+    aabb[5] = dInfinity;
+
+    ax0idx = ( ( axisorder ) & 3 ) << 1;
+    ax1idx = ( ( axisorder >> 2 ) & 3 ) << 1;
+    ax2idx = ( ( axisorder >> 4 ) & 3 ) << 1;
+}
+
+dxSAPSpace::~dxSAPSpace()
+{
+    CHECK_NOT_LOCKED(this);
+    if ( cleanup ) {
+        // note that destroying each geom will call remove()
+        for ( ; DirtyList.size(); dGeomDestroy( DirtyList[ 0 ] ) ) {}
+        for ( ; GeomList.size(); dGeomDestroy( GeomList[ 0 ] ) ) {}
+    }
+    else {
+        // just unhook them
+        for ( ; DirtyList.size(); remove( DirtyList[ 0 ] ) ) {}
+        for ( ; GeomList.size(); remove( GeomList[ 0 ] ) ) {}
+    }
+}
+
+dxGeom* dxSAPSpace::getGeom( int i )
+{
+    dUASSERT( i >= 0 && i < count, "index out of range" );
+    int dirtySize = DirtyList.size();
+    if( i < dirtySize )
+        return DirtyList[i];
+    else
+        return GeomList[i-dirtySize];
+}
+
+void dxSAPSpace::add( dxGeom* g )
+{
+    CHECK_NOT_LOCKED (this);
+    dAASSERT(g);
+    dUASSERT(g->tome_ex == 0 && g->next_ex == 0, "geom is already in a space");
+
+
+    // add to dirty list
+    GEOM_SET_DIRTY_IDX( g, DirtyList.size() );
+    GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX );
+    DirtyList.push( g );
+
+    dxSpace::add(g);
+}
+
+void dxSAPSpace::remove( dxGeom* g )
+{
+    CHECK_NOT_LOCKED(this);
+    dAASSERT(g);
+    dUASSERT(g->parent_space == this,"object is not in this space");
+
+    // remove
+    int dirtyIdx = GEOM_GET_DIRTY_IDX(g);
+    int geomIdx = GEOM_GET_GEOM_IDX(g);
+    // must be in one list, not in both
+    dUASSERT(
+        (dirtyIdx==GEOM_INVALID_IDX && geomIdx>=0 && geomIdx<GeomList.size()) ||
+        (geomIdx==GEOM_INVALID_IDX && dirtyIdx>=0 && dirtyIdx<DirtyList.size()),
+        "geom indices messed up" );
+    if( dirtyIdx != GEOM_INVALID_IDX ) {
+        // we're in dirty list, remove
+        int dirtySize = DirtyList.size();
+        if (dirtyIdx != dirtySize-1) {
+            dxGeom* lastG = DirtyList[dirtySize-1];
+            DirtyList[dirtyIdx] = lastG;
+            GEOM_SET_DIRTY_IDX(lastG,dirtyIdx);
+        }
+        GEOM_SET_DIRTY_IDX(g,GEOM_INVALID_IDX);
+        DirtyList.setSize( dirtySize-1 );
+    } else {
+        // we're in geom list, remove
+        int geomSize = GeomList.size();
+        if (geomIdx != geomSize-1) {
+            dxGeom* lastG = GeomList[geomSize-1];
+            GeomList[geomIdx] = lastG;
+            GEOM_SET_GEOM_IDX(lastG,geomIdx);
+        }
+        GEOM_SET_GEOM_IDX(g,GEOM_INVALID_IDX);
+        GeomList.setSize( geomSize-1 );
+    }
+
+    dxSpace::remove(g);
+}
+
+void dxSAPSpace::dirty( dxGeom* g )
+{
+    dAASSERT(g);
+    dUASSERT(g->parent_space == this, "object is not in this space");
+
+    // check if already dirtied
+    int dirtyIdx = GEOM_GET_DIRTY_IDX(g);
+    if( dirtyIdx != GEOM_INVALID_IDX )
+        return;
+
+    int geomIdx = GEOM_GET_GEOM_IDX(g);
+    dUASSERT( geomIdx>=0 && geomIdx<GeomList.size(), "geom indices messed up" );
+
+    // remove from geom list, place last in place of this
+    int geomSize = GeomList.size();
+    if (geomIdx != geomSize-1) {
+        dxGeom* lastG = GeomList[geomSize-1];
+        GeomList[geomIdx] = lastG;
+        GEOM_SET_GEOM_IDX(lastG,geomIdx);
+    }
+    GeomList.setSize( geomSize-1 );
+
+    // add to dirty list
+    GEOM_SET_GEOM_IDX( g, GEOM_INVALID_IDX );
+    GEOM_SET_DIRTY_IDX( g, DirtyList.size() );
+    DirtyList.push( g );
+}
+
+void dxSAPSpace::computeAABB()
+{
+    // TODO?
+}
+
+void dxSAPSpace::cleanGeoms()
+{
+    int dirtySize = DirtyList.size();
+    if( !dirtySize )
+        return;
+
+    // compute the AABBs of all dirty geoms, clear the dirty flags,
+    // remove from dirty list, place into geom list
+    lock_count++;
+
+    int geomSize = GeomList.size();
+    GeomList.setSize( geomSize + dirtySize ); // ensure space in geom list
+
+    for( int i = 0; i < dirtySize; ++i ) {
+        dxGeom* g = DirtyList[i];
+        if( IS_SPACE(g) ) {
+            ((dxSpace*)g)->cleanGeoms();
+        }
+        
+        g->recomputeAABB();
+        dIASSERT((g->gflags & GEOM_AABB_BAD) == 0);
+        
+        g->gflags &= ~GEOM_DIRTY;
+        
+        // remove from dirty list, add to geom list
+        GEOM_SET_DIRTY_IDX( g, GEOM_INVALID_IDX );
+        GEOM_SET_GEOM_IDX( g, geomSize + i );
+        GeomList[geomSize+i] = g;
+    }
+    // clear dirty list
+    DirtyList.setSize( 0 );
+
+    lock_count--;
+}
+
+void dxSAPSpace::collide( void *data, dNearCallback *callback )
+{
+    dAASSERT (callback);
+
+    lock_count++;
+
+    cleanGeoms();
+
+    // by now all geoms are in GeomList, and DirtyList must be empty
+    int geom_count = GeomList.size();
+    dUASSERT( geom_count == count, "geom counts messed up" );
+
+    // separate all ENABLED geoms into infinite AABBs and normal AABBs
+    TmpGeomList.setSize(0);
+    TmpInfGeomList.setSize(0);
+    int axis0max = ax0idx + 1;
+    for( int i = 0; i < geom_count; ++i ) {
+        dxGeom* g = GeomList[i];
+        if( !GEOM_ENABLED(g) ) // skip disabled ones
+            continue;
+        const dReal& amax = g->aabb[axis0max];
+        if( amax == dInfinity ) // HACK? probably not...
+            TmpInfGeomList.push( g );
+        else
+            TmpGeomList.push( g );
+    }
+
+    // do SAP on normal AABBs
+    dArray< Pair > overlapBoxes;
+    int tmp_geom_count = TmpGeomList.size();
+    if ( tmp_geom_count > 0 )
+    {
+        // Generate a list of overlapping boxes
+        BoxPruning( tmp_geom_count, (const dxGeom**)TmpGeomList.data(), overlapBoxes );
+    }
+
+    // collide overlapping
+    int overlapCount = overlapBoxes.size();
+    for( int j = 0; j < overlapCount; ++j )
+    {
+        const Pair& pair = overlapBoxes[ j ];
+        dxGeom* g1 = TmpGeomList[ pair.id0 ];
+        dxGeom* g2 = TmpGeomList[ pair.id1 ];
+        collideGeomsNoAABBs( g1, g2, data, callback );
+    }
+
+    int infSize = TmpInfGeomList.size();
+    int normSize = TmpGeomList.size();
+    int m, n;
+
+    for ( m = 0; m < infSize; ++m )
+    {
+        dxGeom* g1 = TmpInfGeomList[ m ];
+
+        // collide infinite ones
+        for( n = m+1; n < infSize; ++n ) {
+            dxGeom* g2 = TmpInfGeomList[n];
+            collideGeomsNoAABBs( g1, g2, data, callback );
+        }
+
+        // collide infinite ones with normal ones
+        for( n = 0; n < normSize; ++n ) {
+            dxGeom* g2 = TmpGeomList[n];
+            collideGeomsNoAABBs( g1, g2, data, callback );
+        }
+    }
+
+    lock_count--;
+}
+
+void dxSAPSpace::collide2( void *data, dxGeom *geom, dNearCallback *callback )
+{
+    dAASSERT (geom && callback);
+
+    // TODO: This is just a simple N^2 implementation
+
+    lock_count++;
+
+    cleanGeoms();
+    geom->recomputeAABB();
+
+    // intersect bounding boxes
+    int geom_count = GeomList.size();
+    for ( int i = 0; i < geom_count; ++i ) {
+        dxGeom* g = GeomList[i];
+        if ( GEOM_ENABLED(g) )
+            collideAABBs (g,geom,data,callback);
+    }
+
+    lock_count--;
+}
+
+
+void dxSAPSpace::BoxPruning( int count, const dxGeom** geoms, dArray< Pair >& pairs )
+{
+    // Size the poslist (+1 for infinity end cap)
+    poslist.setSize( count );
+
+    // 1) Build main list using the primary axis
+    //  NOTE: uses floats instead of dReals because that's what radix sort wants
+    for( int i = 0; i < count; ++i )
+        poslist[ i ] = (float)TmpGeomList[i]->aabb[ ax0idx ];
+
+    // 2) Sort the list
+    const uint32* Sorted = sortContext.RadixSort( poslist.data(), count );
+
+    // 3) Prune the list
+    const uint32* const LastSorted = Sorted + count;
+    const uint32* RunningAddress = Sorted;
+
+    bool bExitLoop;
+    Pair IndexPair;
+    while ( Sorted < LastSorted )
+    {
+        IndexPair.id0 = *Sorted++;
+
+        // empty, this loop just advances RunningAddress
+        for (bExitLoop = false; poslist[*RunningAddress++] < poslist[IndexPair.id0]; )
+        {
+            if (RunningAddress == LastSorted)
+            {
+                bExitLoop = true;
+                break;
+            }
+        }
+
+        if ( bExitLoop || RunningAddress == LastSorted) // Not a bug!!!
+        {
+            break;
+        }
+
+        const float idx0ax0max = (float)geoms[IndexPair.id0]->aabb[ax0idx+1]; // To avoid wrong decisions caused by rounding errors, cast the AABB element to float similarly as we did at the function beginning
+        const dReal idx0ax1max = geoms[IndexPair.id0]->aabb[ax1idx+1];
+        const dReal idx0ax2max = geoms[IndexPair.id0]->aabb[ax2idx+1];
+
+        for (const uint32* RunningAddress2 = RunningAddress; poslist[ IndexPair.id1 = *RunningAddress2++ ] <= idx0ax0max; )
+        {
+            const dReal* aabb0 = geoms[ IndexPair.id0 ]->aabb;
+            const dReal* aabb1 = geoms[ IndexPair.id1 ]->aabb;
+
+            // Intersection?
+            if ( idx0ax1max >= aabb1[ax1idx] && aabb1[ax1idx+1] >= aabb0[ax1idx] 
+                && idx0ax2max >= aabb1[ax2idx] && aabb1[ax2idx+1] >= aabb0[ax2idx] )
+            {
+                pairs.push( IndexPair );
+            }
+
+            if (RunningAddress2 == LastSorted)
+            {
+                break;
+            }
+        }
+
+    } // while ( RunningAddress < LastSorted && Sorted < LastSorted )
+}
+
+
+//==============================================================================
+
+//------------------------------------------------------------------------------
+// Radix Sort
+//------------------------------------------------------------------------------
+
+
+
+#define CHECK_PASS_VALIDITY(pass)															\
+    /* Shortcut to current counters */														\
+    const uint32* CurCount = &mHistogram[pass<<8];												\
+    \
+    /* Reset flag. The sorting pass is supposed to be performed. (default) */				\
+    bool PerformPass = true;																\
+    \
+    /* Check pass validity */																\
+    \
+    /* If all values have the same byte, sorting is useless. */								\
+    /* It may happen when sorting bytes or words instead of dwords. */						\
+    /* This routine actually sorts words faster than dwords, and bytes */					\
+    /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */				\
+    /* for words and O(n) for bytes. Running time for floats depends on actual values... */	\
+    \
+    /* Get first byte */																	\
+    uint8 UniqueVal = *(((const uint8*)input)+pass);												\
+    \
+    /* Check that byte's counter */															\
+    if(CurCount[UniqueVal]==nb)	PerformPass=false;
+
+// WARNING ONLY SORTS IEEE FLOATING-POINT VALUES
+const uint32* RaixSortContext::RadixSort( const float* input2, uint32 nb )
+{
+    union _type_cast_union
+    {
+        _type_cast_union(const float *floats): asFloats(floats) {}
+        _type_cast_union(const uint32 *uints32): asUInts32(uints32) {}
+
+        const float *asFloats;
+        const uint32 *asUInts32;
+        const uint8 *asUInts8;
+    };
+
+    const uint32* input = _type_cast_union(input2).asUInts32;
+
+    // Resize lists if needed
+    ReallocateRanksIfNecessary(nb);
+
+    // Allocate histograms & offsets on the stack
+    uint32 mHistogram[256*4];
+    uint32* mLink[256];
+
+    // Create histograms (counters). Counters for all passes are created in one run.
+    // Pros:	read input buffer once instead of four times
+    // Cons:	mHistogram is 4Kb instead of 1Kb
+    // Floating-point values are always supposed to be signed values, so there's only one code path there.
+    // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
+    // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
+    // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
+    // wouldn't work with mixed positive/negative values....
+    {
+        /* Clear counters/histograms */
+        memset(mHistogram, 0, 256*4*sizeof(uint32));
+
+        /* Prepare to count */
+        const uint8* p = _type_cast_union(input).asUInts8;
+        const uint8* pe = &p[nb*4];
+        uint32* h0= &mHistogram[0];		/* Histogram for first pass (LSB)	*/
+        uint32* h1= &mHistogram[256];	/* Histogram for second pass		*/
+        uint32* h2= &mHistogram[512];	/* Histogram for third pass			*/
+        uint32* h3= &mHistogram[768];	/* Histogram for last pass (MSB)	*/
+
+        bool AlreadySorted = true;	/* Optimism... */
+
+        if (!AreRanksValid())
+        {
+            /* Prepare for temporal coherence */
+            const float* Running = input2;
+            float PrevVal = *Running;
+
+            while(p!=pe)
+            {
+                /* Read input input2 in previous sorted order */
+                float Val = *Running++;
+                /* Check whether already sorted or not */
+                if(Val<PrevVal)	{ AlreadySorted = false; break; } /* Early out */
+                /* Update for next iteration */
+                PrevVal = Val;
+
+                /* Create histograms */
+                h0[*p++]++;	h1[*p++]++;	h2[*p++]++;	h3[*p++]++;
+            }
+
+            /* If all input values are already sorted, we just have to return and leave the */
+            /* previous list unchanged. That way the routine may take advantage of temporal */
+            /* coherence, for example when used to sort transparent faces.					*/
+            if(AlreadySorted)
+            {
+                uint32* const Ranks1 = GetRanks1();
+                for(uint32 i=0;i<nb;i++)	Ranks1[i] = i;
+                return Ranks1;
+            }
+        }
+        else
+        {
+            /* Prepare for temporal coherence */
+            uint32* const Ranks1 = GetRanks1();
+
+            uint32* Indices = Ranks1;
+            float PrevVal = input2[*Indices];
+
+            while(p!=pe)
+            {
+                /* Read input input2 in previous sorted order */
+                float Val = input2[*Indices++];
+                /* Check whether already sorted or not */
+                if(Val<PrevVal)	{ AlreadySorted = false; break; } /* Early out */
+                /* Update for next iteration */
+                PrevVal = Val;
+
+                /* Create histograms */
+                h0[*p++]++;	h1[*p++]++;	h2[*p++]++;	h3[*p++]++;
+            }
+
+            /* If all input values are already sorted, we just have to return and leave the */
+            /* previous list unchanged. That way the routine may take advantage of temporal */
+            /* coherence, for example when used to sort transparent faces.					*/
+            if(AlreadySorted)	{ return Ranks1;	}
+        }
+
+        /* Else there has been an early out and we must finish computing the histograms */
+        while(p!=pe)
+        {
+            /* Create histograms without the previous overhead */
+            h0[*p++]++;	h1[*p++]++;	h2[*p++]++;	h3[*p++]++;
+        }
+    }
+
+    // Compute #negative values involved if needed
+    uint32 NbNegativeValues = 0;
+
+    // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128
+    // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte,
+    // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers.
+    uint32* h3= &mHistogram[768];
+    for(uint32 i=128;i<256;i++)	NbNegativeValues += h3[i];	// 768 for last histogram, 128 for negative part
+
+    // Radix sort, j is the pass number (0=LSB, 3=MSB)
+    for(uint32 j=0;j<4;j++)
+    {
+        // Should we care about negative values?
+        if(j!=3)
+        {
+            // Here we deal with positive values only
+            CHECK_PASS_VALIDITY(j);
+
+            if(PerformPass)
+            {
+                uint32* const Ranks2 = GetRanks2();
+                // Create offsets
+                mLink[0] = Ranks2;
+                for(uint32 i=1;i<256;i++)		mLink[i] = mLink[i-1] + CurCount[i-1];
+
+                // Perform Radix Sort
+                const uint8* InputBytes = _type_cast_union(input).asUInts8;
+                InputBytes += j;
+                if (!AreRanksValid())
+                {
+                    for(uint32 i=0;i<nb;i++)
+                    {
+                        *mLink[InputBytes[i<<2]]++ = i;
+                    }
+
+                    ValidateRanks();
+                }
+                else
+                {
+                    uint32* const Ranks1 = GetRanks1();
+
+                    uint32* Indices				= Ranks1;
+                    uint32* const IndicesEnd	= Ranks1 + nb;
+                    while(Indices!=IndicesEnd)
+                    {
+                        uint32 id = *Indices++;
+                        *mLink[InputBytes[id<<2]]++ = id;
+                    }
+                }
+
+                // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
+                SwapRanks();
+            }
+        }
+        else
+        {
+            // This is a special case to correctly handle negative values
+            CHECK_PASS_VALIDITY(j);
+
+            if(PerformPass)
+            {
+                uint32* const Ranks2 = GetRanks2();
+
+                // Create biased offsets, in order for negative numbers to be sorted as well
+                mLink[0] = Ranks2 + NbNegativeValues;										// First positive number takes place after the negative ones
+                for(uint32 i=1;i<128;i++)		mLink[i] = mLink[i-1] + CurCount[i-1];		// 1 to 128 for positive numbers
+
+                // We must reverse the sorting order for negative numbers!
+                mLink[255] = Ranks2;
+                for(uint32 i=0;i<127;i++)	mLink[254-i] = mLink[255-i] + CurCount[255-i];		// Fixing the wrong order for negative values
+                for(uint32 i=128;i<256;i++)	mLink[i] += CurCount[i];							// Fixing the wrong place for negative values
+
+                // Perform Radix Sort
+                if (!AreRanksValid())
+                {
+                    for(uint32 i=0;i<nb;i++)
+                    {
+                        uint32 Radix = input[i]>>24;							// Radix byte, same as above. AND is useless here (uint32).
+                        // ### cmp to be killed. Not good. Later.
+                        if(Radix<128)		*mLink[Radix]++ = i;		// Number is positive, same as above
+                        else				*(--mLink[Radix]) = i;		// Number is negative, flip the sorting order
+                    }
+
+                    ValidateRanks();
+                }
+                else
+                {
+                    uint32* const Ranks1 = GetRanks1();
+
+                    for(uint32 i=0;i<nb;i++)
+                    {
+                        uint32 Radix = input[Ranks1[i]]>>24;							// Radix byte, same as above. AND is useless here (uint32).
+                        // ### cmp to be killed. Not good. Later.
+                        if(Radix<128)		*mLink[Radix]++ = Ranks1[i];		// Number is positive, same as above
+                        else				*(--mLink[Radix]) = Ranks1[i];		// Number is negative, flip the sorting order
+                    }
+                }
+                // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
+                SwapRanks();
+            }
+            else
+            {
+                // The pass is useless, yet we still have to reverse the order of current list if all values are negative.
+                if(UniqueVal>=128)
+                {
+                    if (!AreRanksValid())
+                    {
+                        uint32* const Ranks2 = GetRanks2();
+                        // ###Possible?
+                        for(uint32 i=0;i<nb;i++)
+                        {
+                            Ranks2[i] = nb-i-1;
+                        }
+
+                        ValidateRanks();
+                    }
+                    else
+                    {
+                        uint32* const Ranks1 = GetRanks1();
+                        uint32* const Ranks2 = GetRanks2();
+                        for(uint32 i=0;i<nb;i++)	Ranks2[i] = Ranks1[nb-i-1];
+                    }
+
+                    // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
+                    SwapRanks();
+                }
+            }
+        }
+    }
+
+    // Return indices
+    uint32* const Ranks1 = GetRanks1();
+    return Ranks1;
+}
+
diff --git a/libs/ode-0.16.1/ode/src/collision_space.cpp b/libs/ode-0.16.1/ode/src/collision_space.cpp
new file mode 100644
index 0000000..2ec4247
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_space.cpp
@@ -0,0 +1,864 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+spaces
+
+*/
+
+#include <vector>
+
+#include <ode/common.h>
+#include <ode/collision_space.h>
+#include <ode/collision.h>
+#include "config.h"
+#include "matrix.h"
+#include "collision_kernel.h"
+#include "collision_space_internal.h"
+#include "util.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+//****************************************************************************
+// make the geom dirty by setting the GEOM_DIRTY and GEOM_BAD_AABB flags
+// and moving it to the front of the space's list. all the parents of a
+// dirty geom also become dirty.
+
+void dGeomMoved (dxGeom *geom)
+{
+    dAASSERT (geom);
+
+    // if geom is offset, mark it as needing a calculate
+    if (geom->offset_posr) {
+        geom->gflags |= GEOM_POSR_BAD;
+    }
+
+    // from the bottom of the space heirarchy up, process all clean geoms
+    // turning them into dirty geoms.
+    dxSpace *parent = geom->parent_space;
+
+    while (parent && (geom->gflags & GEOM_DIRTY)==0) {
+        geom->markAABBBad();
+        parent->dirty (geom);
+        geom = parent;
+        parent = parent->parent_space;
+    }
+
+    // all the remaining dirty geoms must have their AABB_BAD flags set, to
+    // ensure that their AABBs get recomputed
+    while (geom) {
+        geom->markAABBBad();
+        geom = geom->parent_space;
+    }
+}
+
+#define GEOM_ENABLED(g) (((g)->gflags & GEOM_ENABLE_TEST_MASK) == GEOM_ENABLE_TEST_VALUE)
+
+//****************************************************************************
+// dxSpace
+
+dxSpace::dxSpace (dSpaceID _space) : dxGeom (_space,0)
+{
+    count = 0;
+    first = 0;
+    cleanup = 1;
+    sublevel = 0;
+    tls_kind = dSPACE_TLS_KIND_INIT_VALUE;
+    current_index = 0;
+    current_geom = 0;
+    lock_count = 0;
+}
+
+
+dxSpace::~dxSpace()
+{
+    CHECK_NOT_LOCKED (this);
+    if (cleanup) {
+        // note that destroying each geom will call remove()
+        dxGeom *g,*n;
+        for (g = first; g; g=n) {
+            n = g->next;
+            dGeomDestroy (g);
+        }
+    }
+    else {
+        dxGeom *g,*n;
+        for (g = first; g; g=n) {
+            n = g->next;
+            remove (g);
+        }
+    }
+}
+
+
+void dxSpace::computeAABB()
+{
+    if (first) {
+        int i;
+        dReal a[6];
+        a[0] = dInfinity;
+        a[1] = -dInfinity;
+        a[2] = dInfinity;
+        a[3] = -dInfinity;
+        a[4] = dInfinity;
+        a[5] = -dInfinity;
+        for (dxGeom *g=first; g; g=g->next) {
+            g->recomputeAABB();
+            for (i=0; i<6; i += 2) if (g->aabb[i] < a[i]) a[i] = g->aabb[i];
+            for (i=1; i<6; i += 2) if (g->aabb[i] > a[i]) a[i] = g->aabb[i];
+        }
+        memcpy(aabb,a,6*sizeof(dReal));
+    }
+    else {
+        dSetZero (aabb,6);
+    }
+}
+
+
+// the dirty geoms are numbered 0..k, the clean geoms are numbered k+1..count-1
+
+dxGeom *dxSpace::getGeom (int i)
+{
+    dUASSERT (i >= 0 && i < count,"index out of range");
+    if (current_geom && current_index == i-1) {
+        current_geom = current_geom->next;
+        current_index = i;
+        return current_geom;
+    }
+    else {
+        dxGeom *g=first;
+        for (int j=0; j<i; j++) {
+            if (g) g = g->next; else return 0;
+        }
+        current_geom = g;
+        current_index = i;
+        return g;
+    }
+}
+
+
+void dxSpace::add (dxGeom *geom)
+{
+    CHECK_NOT_LOCKED (this);
+    dAASSERT (geom);
+    dUASSERT (geom->parent_space == 0 && geom->next == 0,
+        "geom is already in a space");
+
+    // add
+    geom->parent_space = this;
+    geom->spaceAdd (&first);
+    count++;
+
+    // enumerator has been invalidated
+    current_geom = 0;
+
+    dGeomMoved (this);
+}
+
+
+void dxSpace::remove (dxGeom *geom)
+{
+    CHECK_NOT_LOCKED (this);
+    dAASSERT (geom);
+    dUASSERT (geom->parent_space == this,"object is not in this space");
+
+    // remove
+    geom->spaceRemove();
+    count--;
+
+    // safeguard
+    geom->next = 0;
+    geom->tome = 0;
+    geom->parent_space = 0;
+
+    // enumerator has been invalidated
+    current_geom = 0;
+
+    // the bounding box of this space (and that of all the parents) may have
+    // changed as a consequence of the removal.
+    dGeomMoved (this);
+}
+
+
+void dxSpace::dirty (dxGeom *geom)
+{
+    geom->spaceRemove();
+    geom->spaceAdd (&first);
+}
+
+//****************************************************************************
+// simple space - reports all n^2 object intersections
+
+struct dxSimpleSpace : public dxSpace {
+    dxSimpleSpace (dSpaceID _space);
+    void cleanGeoms();
+    void collide (void *data, dNearCallback *callback);
+    void collide2 (void *data, dxGeom *geom, dNearCallback *callback);
+};
+
+
+dxSimpleSpace::dxSimpleSpace (dSpaceID _space) : dxSpace (_space)
+{
+    type = dSimpleSpaceClass;
+}
+
+
+void dxSimpleSpace::cleanGeoms()
+{
+    // compute the AABBs of all dirty geoms, and clear the dirty flags
+    lock_count++;
+    for (dxGeom *g=first; g && (g->gflags & GEOM_DIRTY); g=g->next) {
+        if (IS_SPACE(g)) {
+            ((dxSpace*)g)->cleanGeoms();
+        }
+
+        g->recomputeAABB();
+        dIASSERT((g->gflags & GEOM_AABB_BAD) == 0);
+
+        g->gflags &= ~GEOM_DIRTY;
+    }
+    lock_count--;
+}
+
+
+void dxSimpleSpace::collide (void *data, dNearCallback *callback)
+{
+    dAASSERT (callback);
+
+    lock_count++;
+    cleanGeoms();
+
+    // intersect all bounding boxes
+    for (dxGeom *g1=first; g1; g1=g1->next) {
+        if (GEOM_ENABLED(g1)){
+            for (dxGeom *g2=g1->next; g2; g2=g2->next) {
+                if (GEOM_ENABLED(g2)){
+                    collideAABBs (g1,g2,data,callback);
+                }
+            }
+        }
+    }
+
+    lock_count--;
+}
+
+
+void dxSimpleSpace::collide2 (void *data, dxGeom *geom,
+                              dNearCallback *callback)
+{
+    dAASSERT (geom && callback);
+
+    lock_count++;
+    cleanGeoms();
+    geom->recomputeAABB();
+
+    // intersect bounding boxes
+    for (dxGeom *g=first; g; g=g->next) {
+        if (GEOM_ENABLED(g)){
+            collideAABBs (g,geom,data,callback);
+        }
+    }
+
+    lock_count--;
+}
+
+//****************************************************************************
+// utility stuff for hash table space
+
+// kind of silly, but oh well...
+#ifndef MAXINT
+#define MAXINT ((int)((((unsigned int)(-1)) << 1) >> 1))
+#endif
+
+
+// prime[i] is the largest prime smaller than 2^i
+#define NUM_PRIMES 31
+static const unsigned long int prime[NUM_PRIMES] = {1L,2L,3L,7L,13L,31L,61L,127L,251L,509L,
+1021L,2039L,4093L,8191L,16381L,32749L,65521L,131071L,262139L,
+524287L,1048573L,2097143L,4194301L,8388593L,16777213L,33554393L,
+67108859L,134217689L,268435399L,536870909L,1073741789L};
+
+
+// an axis aligned bounding box in the hash table
+struct dxAABB {
+    int level;		// the level this is stored in (cell size = 2^level)
+    int dbounds[6];	// AABB bounds, discretized to cell size
+    dxGeom *geom;		// corresponding geometry object (AABB stored here)
+    sizeint index;		// index of this AABB, starting from 0
+};
+
+
+// a hash table node that represents an AABB that intersects a particular cell
+// at a particular level
+struct Node {
+    Node *next;		// next node in hash table collision list, 0 if none
+    int x,y,z;		// cell position in space, discretized to cell size
+    dxAABB *aabb;		// axis aligned bounding box that intersects this cell
+};
+
+
+// return the `level' of an AABB. the AABB will be put into cells at this
+// level - the cell size will be 2^level. the level is chosen to be the
+// smallest value such that the AABB occupies no more than 8 cells, regardless
+// of its placement. this means that:
+//	size/2 < q <= size
+// where q is the maximum AABB dimension.
+
+static int findLevel (dReal bounds[6])
+{
+    if (bounds[0] <= -dInfinity || bounds[1] >= dInfinity ||
+        bounds[2] <= -dInfinity || bounds[3] >= dInfinity ||
+        bounds[4] <= -dInfinity || bounds[5] >= dInfinity) {
+            return MAXINT;
+    }
+
+    // compute q
+    dReal q,q2;
+    q = bounds[1] - bounds[0];	// x bounds
+    q2 = bounds[3] - bounds[2];	// y bounds
+    if (q2 > q) q = q2;
+    q2 = bounds[5] - bounds[4];	// z bounds
+    if (q2 > q) q = q2;
+
+    // find level such that 0.5 * 2^level < q <= 2^level
+    int level;
+    frexp (q,&level);	// q = (0.5 .. 1.0) * 2^level (definition of frexp)
+    return level;
+}
+
+
+// find a virtual memory address for a cell at the given level and x,y,z
+// position.
+// @@@ currently this is not very sophisticated, e.g. the scaling
+// factors could be better designed to avoid collisions, and they should
+// probably depend on the hash table physical size.
+
+static unsigned long getVirtualAddressBase (unsigned int level, unsigned int x, unsigned int y)
+{
+    return level * 1000UL + x * 100UL + y * 10UL;
+}
+
+//****************************************************************************
+// hash space
+
+struct dxHashSpace : public dxSpace {
+    int global_minlevel;	// smallest hash table level to put AABBs in
+    int global_maxlevel;	// objects that need a level larger than this will be
+    // put in a "big objects" list instead of a hash table
+
+    dxHashSpace (dSpaceID _space);
+    void setLevels (int minlevel, int maxlevel);
+    void getLevels (int *minlevel, int *maxlevel);
+    void cleanGeoms();
+    void collide (void *data, dNearCallback *callback);
+    void collide2 (void *data, dxGeom *geom, dNearCallback *callback);
+};
+
+
+dxHashSpace::dxHashSpace (dSpaceID _space) : dxSpace (_space)
+{
+    type = dHashSpaceClass;
+    global_minlevel = -3;
+    global_maxlevel = 10;
+}
+
+
+void dxHashSpace::setLevels (int minlevel, int maxlevel)
+{
+    dAASSERT (minlevel <= maxlevel);
+    global_minlevel = minlevel;
+    global_maxlevel = maxlevel;
+}
+
+
+void dxHashSpace::getLevels (int *minlevel, int *maxlevel)
+{
+    if (minlevel) *minlevel = global_minlevel;
+    if (maxlevel) *maxlevel = global_maxlevel;
+}
+
+
+void dxHashSpace::cleanGeoms()
+{
+    // compute the AABBs of all dirty geoms, and clear the dirty flags
+    lock_count++;
+    for (dxGeom *g=first; g && (g->gflags & GEOM_DIRTY); g=g->next) {
+        if (IS_SPACE(g)) {
+            ((dxSpace*)g)->cleanGeoms();
+        }
+
+        g->recomputeAABB();
+        dIASSERT((g->gflags & GEOM_AABB_BAD) == 0);
+        
+        g->gflags &= ~GEOM_DIRTY;
+    }
+    lock_count--;
+}
+
+
+void dxHashSpace::collide (void *data, dNearCallback *callback)
+{
+    dAASSERT(this && callback);
+    dxGeom *geom;
+    int i,maxlevel;
+
+    // 0 or 1 geoms can't collide with anything
+    if (count < 2) return;
+
+    lock_count++;
+    cleanGeoms();
+
+    // create a list of auxiliary information for all geom axis aligned bounding
+    // boxes. set the level for all AABBs. put AABBs larger than the space's
+    // global_maxlevel in the big_boxes list, check everything else against
+    // that list at the end. for AABBs that are not too big, record the maximum
+    // level that we need.
+
+    typedef std::vector<dxAABB> AABBlist;
+    AABBlist hash_boxes; // list of AABBs in hash table
+    AABBlist big_boxes; // list of AABBs too big for hash table
+    maxlevel = global_minlevel - 1;
+    for (geom = first; geom; geom=geom->next) {
+        if (!GEOM_ENABLED(geom)){
+            continue;
+        }
+        dxAABB aabb;
+        aabb.geom = geom;
+        // compute level, but prevent cells from getting too small
+        int level = findLevel (geom->aabb);
+        if (level < global_minlevel) level = global_minlevel;
+        if (level <= global_maxlevel) {
+            aabb.level = level;
+            if (level > maxlevel) maxlevel = level;
+            // cellsize = 2^level
+            dReal cellSizeRecip = dRecip(ldexp(REAL(1.0), level)); // No computational errors here!
+            // discretize AABB position to cell size
+            for (i=0; i < 6; i++) {
+                dReal aabbBound = geom->aabb[i] * cellSizeRecip; // No computational errors so far!
+                dICHECK(aabbBound >= dMinIntExact && aabbBound </*=*/ dMaxIntExact); // Otherwise the scene is too large for integer types used 
+
+                aabb.dbounds[i] = (int) dFloor(aabbBound);
+            }
+            // set AABB index
+            aabb.index = hash_boxes.size();
+            // aabb goes in main list
+            hash_boxes.push_back(aabb);
+        }
+        else {
+            // aabb is too big, put it in the big_boxes list. we don't care about
+            // setting level, dbounds, index, or the maxlevel
+            big_boxes.push_back(aabb);
+        }
+    }
+
+    sizeint n = hash_boxes.size(); // number of AABBs in main list
+
+    // for `n' objects, an n*n array of bits is used to record if those objects
+    // have been intersection-tested against each other yet. this array can
+    // grow large with high n, but oh well...
+    int tested_rowsize = (n+7) >> 3;	// number of bytes needed for n bits
+    std::vector<uint8> tested(n * tested_rowsize);
+
+    // create a hash table to store all AABBs. each AABB may take up to 8 cells.
+    // we use chaining to resolve collisions, but we use a relatively large table
+    // to reduce the chance of collisions.
+
+    // compute hash table size sz to be a prime > 8*n
+    for (i=0; i<NUM_PRIMES; i++) {
+        if ((sizeint)prime[i] >= (8*n)) break;
+    }
+    if (i >= NUM_PRIMES) {
+        i = NUM_PRIMES-1;	// probably pointless
+    }
+
+    const unsigned long sz = prime[i];
+
+    // allocate and initialize hash table node pointers
+    typedef std::vector<Node*> HashTable;
+    HashTable table(sz);
+
+    // add each AABB to the hash table (may need to add it to up to 8 cells)
+    const AABBlist::iterator hashend = hash_boxes.end();
+    for (AABBlist::iterator aabb = hash_boxes.begin(); aabb != hashend; ++aabb) {
+        const int *dbounds = aabb->dbounds;
+        const int xend = dbounds[1];
+        for (int xi = dbounds[0]; xi <= xend; xi++) {
+            const int yend = dbounds[3];
+            for (int yi = dbounds[2]; yi <= yend; yi++) {
+                int zbegin = dbounds[4];
+                unsigned long hi = (getVirtualAddressBase(aabb->level,xi,yi) + zbegin) % sz;
+                const int zend = dbounds[5];
+                for (int zi = zbegin; zi <= zend; (hi = hi + 1U != sz ? hi + 1U : 0UL), zi++) {
+                    // get the hash index
+                    // add a new node to the hash table
+                    Node *node = new Node;
+                    node->x = xi;
+                    node->y = yi;
+                    node->z = zi;
+                    node->aabb = &*aabb;
+                    node->next = table[hi];
+                    table[hi] = node;
+                }
+            }
+        }
+    }
+
+    // now that all AABBs are loaded into the hash table, we do the actual
+    // collision detection. for all AABBs, check for other AABBs in the
+    // same cells for collisions, and then check for other AABBs in all
+    // intersecting higher level cells.
+
+    int db[6];			// discrete bounds at current level
+    for (AABBlist::iterator aabb = hash_boxes.begin(); aabb != hashend; ++aabb) {
+        // we are searching for collisions with aabb
+        for (i=0; i<6; i++) db[i] = aabb->dbounds[i];
+        for (int level = aabb->level; ; ) {
+            dIASSERT(level <= maxlevel);
+            const int xend = db[1];
+            for (int xi = db[0]; xi <= xend; xi++) {
+                const int yend = db[3];
+                for (int yi = db[2]; yi <= yend; yi++) {
+                    int zbegin = db[4];
+                    // get the hash index
+                    unsigned long hi = (getVirtualAddressBase(level, xi, yi) + zbegin) % sz;
+                    const int zend = db[5];
+                    for (int zi = zbegin; zi <= zend; (hi = hi + 1U != sz ? hi + 1U : 0UL), zi++) {
+                        // search all nodes at this index
+                        for (Node* node = table[hi]; node; node=node->next) {
+                            // node points to an AABB that may intersect aabb
+                            if (node->aabb == &*aabb)
+                                continue;
+                            if (node->aabb->level == level &&
+                                node->x == xi && node->y == yi && node->z == zi) {
+                                    // see if aabb and node->aabb have already been tested
+                                    // against each other
+                                    unsigned char mask;
+                                    if (aabb->index <= node->aabb->index) {
+                                        i = (aabb->index * tested_rowsize)+(node->aabb->index >> 3);
+                                        mask = 1 << (node->aabb->index & 7);
+                                    }
+                                    else {
+                                        i = (node->aabb->index * tested_rowsize)+(aabb->index >> 3);
+                                        mask = 1 << (aabb->index & 7);
+                                    }
+                                    dIASSERT (i >= 0 && (sizeint)i < (tested_rowsize*n));
+                                    if ((tested[i] & mask)==0) {
+                                        tested[i] |= mask;
+                                        collideAABBs (aabb->geom,node->aabb->geom,data,callback);
+                                    }
+                            }
+                        }
+                    }
+                }
+            }
+
+            if (level == maxlevel) {
+                break;
+            }
+            ++level;
+            // get the discrete bounds for the next level up
+            for (i=0; i<6; i++) db[i] >>= 1;
+        }
+    }
+
+    // every AABB in the normal list must now be intersected against every
+    // AABB in the big_boxes list. so let's hope there are not too many objects
+    // in the big_boxes list.
+    const AABBlist::iterator bigend = big_boxes.end();
+    for (AABBlist::iterator aabb = hash_boxes.begin(); aabb != hashend; ++aabb) {
+        for (AABBlist::iterator aabb2 = big_boxes.begin(); aabb2 != bigend; ++aabb2) {
+            collideAABBs (aabb->geom, aabb2->geom, data, callback);
+        }
+    }
+
+    // intersected all AABBs in the big_boxes list together
+    for (AABBlist::iterator aabb = big_boxes.begin(); aabb != bigend; ++aabb) {
+        AABBlist::iterator aabb2 = aabb;
+        while (++aabb2 != bigend) {
+            collideAABBs (aabb->geom, aabb2->geom, data, callback);
+        }
+    }
+
+    // deallocate table
+    const HashTable::iterator tableend = table.end();
+    for (HashTable::iterator el = table.begin(); el != tableend; ++el)
+        for (Node* node = *el; node; ) {
+            Node* next = node->next;
+            delete node;
+            node = next;
+        }
+
+    lock_count--;
+}
+
+
+void dxHashSpace::collide2 (void *data, dxGeom *geom,
+                            dNearCallback *callback)
+{
+    dAASSERT (geom && callback);
+
+    // this could take advantage of the hash structure to avoid
+    // O(n2) complexity, but it does not yet.
+
+    lock_count++;
+    cleanGeoms();
+    geom->recomputeAABB();
+
+    // intersect bounding boxes
+    for (dxGeom *g=first; g; g=g->next) {
+        if (GEOM_ENABLED(g)) collideAABBs (g,geom,data,callback);
+    }
+
+    lock_count--;
+}
+
+//****************************************************************************
+// space functions
+
+dxSpace *dSimpleSpaceCreate (dxSpace *space)
+{
+    return new dxSimpleSpace (space);
+}
+
+
+dxSpace *dHashSpaceCreate (dxSpace *space)
+{
+    return new dxHashSpace (space);
+}
+
+
+void dHashSpaceSetLevels (dxSpace *space, int minlevel, int maxlevel)
+{
+    dAASSERT (space);
+    dUASSERT (minlevel <= maxlevel,"must have minlevel <= maxlevel");
+    dUASSERT (space->type == dHashSpaceClass,"argument must be a hash space");
+    dxHashSpace *hspace = (dxHashSpace*) space;
+    hspace->setLevels (minlevel,maxlevel);
+}
+
+
+void dHashSpaceGetLevels (dxSpace *space, int *minlevel, int *maxlevel)
+{
+    dAASSERT (space);
+    dUASSERT (space->type == dHashSpaceClass,"argument must be a hash space");
+    dxHashSpace *hspace = (dxHashSpace*) space;
+    hspace->getLevels (minlevel,maxlevel);
+}
+
+
+void dSpaceDestroy (dxSpace *space)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    dGeomDestroy (space);
+}
+
+
+void dSpaceSetCleanup (dxSpace *space, int mode)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    space->setCleanup (mode);
+}
+
+
+int dSpaceGetCleanup (dxSpace *space)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    return space->getCleanup();
+}
+
+
+void dSpaceSetSublevel (dSpaceID space, int sublevel)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    space->setSublevel (sublevel);
+}
+
+
+int dSpaceGetSublevel (dSpaceID space)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    return space->getSublevel();
+}
+
+void dSpaceSetManualCleanup (dSpaceID space, int mode)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    space->setManulCleanup(mode);
+}
+
+int dSpaceGetManualCleanup (dSpaceID space)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    return space->getManualCleanup();
+}
+
+void dSpaceAdd (dxSpace *space, dxGeom *g)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    CHECK_NOT_LOCKED (space);
+    space->add (g);
+}
+
+
+void dSpaceRemove (dxSpace *space, dxGeom *g)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    CHECK_NOT_LOCKED (space);
+    space->remove (g);
+}
+
+
+int dSpaceQuery (dxSpace *space, dxGeom *g)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    return space->query (g);
+}
+
+void dSpaceClean (dxSpace *space){
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+
+    space->cleanGeoms();
+}
+
+int dSpaceGetNumGeoms (dxSpace *space)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    return space->getNumGeoms();
+}
+
+
+dGeomID dSpaceGetGeom (dxSpace *space, int i)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    return space->getGeom (i);
+}
+
+int dSpaceGetClass (dxSpace *space)
+{
+    dAASSERT (space);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    return space->type;
+}
+
+
+void dSpaceCollide (dxSpace *space, void *data, dNearCallback *callback)
+{
+    dAASSERT (space && callback);
+    dUASSERT (dGeomIsSpace(space),"argument not a space");
+    space->collide (data,callback);
+}
+
+
+struct DataCallback {
+    void *data;
+    dNearCallback *callback;
+};
+// Invokes the callback with arguments swapped
+static void swap_callback(void *data, dxGeom *g1, dxGeom *g2)
+{
+    DataCallback *dc = (DataCallback*)data;
+    dc->callback(dc->data, g2, g1);
+}
+
+
+void dSpaceCollide2 (dxGeom *g1, dxGeom *g2, void *data,
+                     dNearCallback *callback)
+{
+    dAASSERT (g1 && g2 && callback);
+    dxSpace *s1,*s2;
+
+    // see if either geom is a space
+    if (IS_SPACE(g1)) s1 = (dxSpace*) g1; else s1 = 0;
+    if (IS_SPACE(g2)) s2 = (dxSpace*) g2; else s2 = 0;
+
+    if (s1 && s2) {
+        int l1 = s1->getSublevel();
+        int l2 = s2->getSublevel();
+        if (l1 != l2) {
+            if (l1 > l2) {
+                s2 = 0;
+            } else {
+                s1 = 0;
+            }
+        }
+    }
+
+    // handle the four space/geom cases
+    if (s1) {
+        if (s2) {
+            // g1 and g2 are spaces.
+            if (s1==s2) {
+                // collide a space with itself --> interior collision
+                s1->collide (data,callback);
+            }
+            else {
+                // iterate through the space that has the fewest geoms, calling
+                // collide2 in the other space for each one.
+                if (s1->count < s2->count) {
+                    DataCallback dc = {data, callback};
+                    for (dxGeom *g = s1->first; g; g=g->next) {
+                        s2->collide2 (&dc,g,swap_callback);
+                    }
+                }
+                else {
+                    for (dxGeom *g = s2->first; g; g=g->next) {
+                        s1->collide2 (data,g,callback);
+                    }
+                }
+            }
+        }
+        else {
+            // g1 is a space, g2 is a geom
+            s1->collide2 (data,g2,callback);
+        }
+    }
+    else {
+        if (s2) {
+            // g1 is a geom, g2 is a space
+            DataCallback dc = {data, callback};
+            s2->collide2 (&dc,g1,swap_callback);
+        }
+        else {
+            // g1 and g2 are geoms
+            // make sure they have valid AABBs
+            g1->recomputeAABB();
+            g2->recomputeAABB();
+            collideAABBs(g1,g2, data, callback);
+        }
+    }
+}
diff --git a/libs/ode-0.16.1/ode/src/collision_space_internal.h b/libs/ode-0.16.1/ode/src/collision_space_internal.h
new file mode 100644
index 0000000..be69b81
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_space_internal.h
@@ -0,0 +1,80 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+stuff common to all spaces
+
+*/
+
+#ifndef _ODE_COLLISION_SPACE_INTERNAL_H_
+#define _ODE_COLLISION_SPACE_INTERNAL_H_
+
+#define ALLOCA(x) dALLOCA16(x)
+
+
+// collide two geoms together. for the hash table space, this is
+// called if the two AABBs inhabit the same hash table cells.
+// this only calls the callback function if the AABBs actually
+// intersect. if a geom has an AABB test function, that is called to
+// provide a further refinement of the intersection.
+//
+// NOTE: this assumes that the geom AABBs are valid on entry
+// and that both geoms are enabled.
+
+static inline void collideAABBs (dxGeom *g1, dxGeom *g2,
+                                 void *data, dNearCallback *callback)
+{
+    dIASSERT((g1->gflags & GEOM_AABB_BAD)==0);
+    dIASSERT((g2->gflags & GEOM_AABB_BAD)==0);
+
+    // no contacts if both geoms on the same body, and the body is not 0
+    if (g1->body == g2->body && g1->body) return;
+
+    // test if the category and collide bitfields match
+    if ( ((g1->category_bits & g2->collide_bits) ||
+        (g2->category_bits & g1->collide_bits)) == 0) {
+            return;
+    }
+
+    // if the bounding boxes are disjoint then don't do anything
+    dReal *bounds1 = g1->aabb;
+    dReal *bounds2 = g2->aabb;
+    if (bounds1[0] > bounds2[1] ||
+        bounds1[1] < bounds2[0] ||
+        bounds1[2] > bounds2[3] ||
+        bounds1[3] < bounds2[2] ||
+        bounds1[4] > bounds2[5] ||
+        bounds1[5] < bounds2[4]) {
+            return;
+    }
+
+    // check if either object is able to prove that it doesn't intersect the
+    // AABB of the other
+    if (g1->AABBTest (g2,bounds2) == 0) return;
+    if (g2->AABBTest (g1,bounds1) == 0) return;
+
+    // the objects might actually intersect - call the space callback function
+    callback (data,g1,g2);
+}
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/collision_std.h b/libs/ode-0.16.1/ode/src/collision_std.h
new file mode 100644
index 0000000..710e580
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_std.h
@@ -0,0 +1,238 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+the standard ODE geometry primitives.
+
+*/
+
+#ifndef _ODE_COLLISION_STD_H_
+#define _ODE_COLLISION_STD_H_
+
+#include <ode/common.h>
+#include "collision_kernel.h"
+
+
+// primitive collision functions - these have the dColliderFn interface, i.e.
+// the same interface as dCollide(). the first and second geom arguments must
+// have the specified types.
+
+int dCollideSphereSphere (dxGeom *o1, dxGeom *o2, int flags,
+                          dContactGeom *contact, int skip);
+int dCollideSphereBox (dxGeom *o1, dxGeom *o2, int flags,
+                       dContactGeom *contact, int skip);
+int dCollideSpherePlane (dxGeom *o1, dxGeom *o2, int flags,
+                         dContactGeom *contact, int skip);
+int dCollideBoxBox (dxGeom *o1, dxGeom *o2, int flags,
+                    dContactGeom *contact, int skip);
+int dCollideBoxPlane (dxGeom *o1, dxGeom *o2,
+                      int flags, dContactGeom *contact, int skip);
+int dCollideCapsuleSphere (dxGeom *o1, dxGeom *o2, int flags,
+                           dContactGeom *contact, int skip);
+int dCollideCapsuleBox (dxGeom *o1, dxGeom *o2, int flags,
+                        dContactGeom *contact, int skip);
+int dCollideCapsuleCapsule (dxGeom *o1, dxGeom *o2,
+                            int flags, dContactGeom *contact, int skip);
+int dCollideCapsulePlane (dxGeom *o1, dxGeom *o2, int flags,
+                          dContactGeom *contact, int skip);
+int dCollideRaySphere (dxGeom *o1, dxGeom *o2, int flags,
+                       dContactGeom *contact, int skip);
+int dCollideRayBox (dxGeom *o1, dxGeom *o2, int flags,
+                    dContactGeom *contact, int skip);
+int dCollideRayCapsule (dxGeom *o1, dxGeom *o2,
+                        int flags, dContactGeom *contact, int skip);
+int dCollideRayPlane (dxGeom *o1, dxGeom *o2, int flags,
+                      dContactGeom *contact, int skip);
+int dCollideRayCylinder (dxGeom *o1, dxGeom *o2, int flags,
+                         dContactGeom *contact, int skip);
+
+// Cylinder - Box/Sphere by (C) CroTeam
+// Ported by Nguyen Binh
+int dCollideCylinderBox(dxGeom *o1, dxGeom *o2, 
+                        int flags, dContactGeom *contact, int skip);
+int dCollideCylinderSphere(dxGeom *gCylinder, dxGeom *gSphere, 
+                           int flags, dContactGeom *contact, int skip); 
+int dCollideCylinderPlane(dxGeom *gCylinder, dxGeom *gPlane, 
+                          int flags, dContactGeom *contact, int skip); 
+
+//--> Convex Collision
+int dCollideConvexPlane (dxGeom *o1, dxGeom *o2, int flags,
+                         dContactGeom *contact, int skip);
+int dCollideSphereConvex (dxGeom *o1, dxGeom *o2, int flags,
+                          dContactGeom *contact, int skip);
+int dCollideConvexBox (dxGeom *o1, dxGeom *o2, int flags,
+                       dContactGeom *contact, int skip);
+int dCollideConvexCapsule (dxGeom *o1, dxGeom *o2,
+                           int flags, dContactGeom *contact, int skip);
+int dCollideConvexConvex (dxGeom *o1, dxGeom *o2, int flags, 
+                          dContactGeom *contact, int skip);
+int dCollideRayConvex (dxGeom *o1, dxGeom *o2, int flags, 
+                       dContactGeom *contact, int skip);
+//<-- Convex Collision
+
+// dHeightfield
+int dCollideHeightfield( dxGeom *o1, dxGeom *o2, 
+                        int flags, dContactGeom *contact, int skip );
+
+//****************************************************************************
+// the basic geometry objects
+
+struct dxSphere : public dxGeom {
+    dReal radius;		// sphere radius
+    dxSphere (dSpaceID space, dReal _radius);
+    void computeAABB();
+};
+
+
+struct dxBox : public dxGeom {
+    dVector3 side;	// side lengths (x,y,z)
+    dxBox (dSpaceID space, dReal lx, dReal ly, dReal lz);
+    void computeAABB();
+};
+
+
+struct dxCapsule : public dxGeom {
+    dReal radius,lz;	// radius, length along z axis
+    dxCapsule (dSpaceID space, dReal _radius, dReal _length);
+    void computeAABB();
+};
+
+
+struct dxCylinder : public dxGeom {
+    dReal radius,lz;        // radius, length along z axis
+    dxCylinder (dSpaceID space, dReal _radius, dReal _length);
+    void computeAABB();
+};
+
+
+struct dxPlane : public dxGeom {
+    dReal p[4];
+    dxPlane (dSpaceID space, dReal a, dReal b, dReal c, dReal d);
+    void computeAABB();
+};
+
+
+struct dxRay : public dxGeom {
+    dReal length;
+    dxRay (dSpaceID space, dReal _length);
+    void computeAABB();
+};
+
+struct dxConvex : public dxGeom 
+{  
+    const dReal *planes; /*!< An array of planes in the form:
+                   normal X, normal Y, normal Z,Distance
+                   */
+    const dReal *points; /*!< An array of points X,Y,Z */  
+    const unsigned int *polygons; /*! An array of indices to the points of each polygon, it should be the number of vertices followed by that amount of indices to "points" in counter clockwise order*/
+    unsigned int planecount; /*!< Amount of planes in planes */
+    unsigned int pointcount;/*!< Amount of points in points */
+    unsigned int edgecount;/*!< Amount of edges in convex */
+    dReal saabb[6];/*!< Static AABB */
+    dxConvex(dSpaceID space,
+        const dReal *planes,
+        unsigned int planecount,
+        const dReal *points,
+        unsigned int pointcount,
+        const unsigned int *polygons);
+    ~dxConvex()
+    {
+        if((edgecount!=0)&&(edges!=NULL)) delete[] edges;
+    }
+    void computeAABB();
+    struct edge
+    {
+        unsigned int first;
+        unsigned int second;
+    };
+    edge* edges;
+
+    /*! \brief A Support mapping function for convex shapes
+    \param dir [IN] direction to find the Support Point for
+    \return the index of the support vertex.
+    */
+    inline unsigned int SupportIndex(dVector3 dir)
+    {
+        dVector3 rdir;
+        unsigned int index=0;
+        dMultiply1_331 (rdir,final_posr->R,dir);
+        dReal max = dCalcVectorDot3(points,rdir);
+        dReal tmp;
+        for (unsigned int i = 1; i < pointcount; ++i) 
+        {
+            tmp = dCalcVectorDot3(points+(i*3),rdir);
+            if (tmp > max) 
+            {
+                index=i;
+                max = tmp; 
+            }
+        }
+        return index;
+    }
+
+private:
+    // For Internal Use Only
+    /*! \brief Fills the edges dynamic array based on points and polygons.
+    */
+    void FillEdges();
+#if 0
+    /*
+    What this does is the same as the Support function by doing some preprocessing
+    for optimization. Not complete yet.
+    */
+    // Based on Eberly's Game Physics Book page 307
+    struct Arc
+    {
+        // indices of polyhedron normals that form the spherical arc
+        int normals[2];
+        // index of edge shared by polyhedron faces
+        int edge;
+    };
+    struct Polygon
+    {
+        // indices of polyhedron normals that form the spherical polygon
+        std::vector<int> normals;
+        // index of extreme vertex corresponding to this polygon
+        int vertex;
+    };
+    // This is for extrem feature query and not the usual level BSP structure (that comes later)
+    struct BSPNode
+    {
+        // Normal index (interior node), vertex index (leaf node)
+        int normal;
+        // if Dot (E,D)>=0, D gets propagated to this child
+        BSPNode* right;
+        // if Dot (E,D)<0, D gets propagated to this child
+        BSPNode* left;
+    };
+    void CreateTree();
+    BSPNode* CreateNode(std::vector<Arc> Arcs,std::vector<Polygon> Polygons);
+    void GetFacesSharedByVertex(int i, std::vector<int> f);
+    void GetFacesSharedByEdge(int i, int* f);
+    void GetFaceNormal(int i, dVector3 normal);
+    BSPNode* tree;
+#endif
+};
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/collision_transform.cpp b/libs/ode-0.16.1/ode/src/collision_transform.cpp
new file mode 100644
index 0000000..ece3d53
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_transform.cpp
@@ -0,0 +1,234 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+geom transform
+
+*/
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_transform.h"
+#include "collision_util.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+//****************************************************************************
+// dxGeomTransform class
+
+struct dxGeomTransform : public dxGeom {
+    dxGeom *obj;		// object that is being transformed
+    int cleanup;		// 1 to destroy obj when destroyed
+    int infomode;		// 1 to put Tx geom in dContactGeom g1
+
+    // cached final object transform (body tx + relative tx). this is set by
+    // computeAABB(), and it is valid while the AABB is valid.
+    dxPosR transform_posr;
+
+    dxGeomTransform (dSpaceID space);
+    ~dxGeomTransform();
+    void computeAABB();
+    void computeFinalTx();
+};
+/*
+void RunMe()
+{
+printf("sizeof body = %i\n", sizeof(dxBody));
+printf("sizeof geom = %i\n", sizeof(dxGeom));
+printf("sizeof geomtransform = %i\n", sizeof(dxGeomTransform));
+printf("sizeof posr = %i\n", sizeof(dxPosR));
+}
+*/
+
+dxGeomTransform::dxGeomTransform (dSpaceID space) : dxGeom (space,1)
+{
+    type = dGeomTransformClass;
+    obj = 0;
+    cleanup = 0;
+    infomode = 0;
+    dSetZero (transform_posr.pos,4);
+    dRSetIdentity (transform_posr.R);
+}
+
+
+dxGeomTransform::~dxGeomTransform()
+{
+    if (obj && cleanup) delete obj;
+}
+
+
+void dxGeomTransform::computeAABB()
+{
+    if (!obj) {
+        dSetZero (aabb,6);
+        return;
+    }
+
+    // backup the relative pos and R pointers of the encapsulated geom object
+    dxPosR* posr_bak = obj->final_posr;
+
+    // compute temporary pos and R for the encapsulated geom object
+    computeFinalTx();
+    obj->final_posr = &transform_posr;
+
+    // compute the AABB
+    obj->computeAABB();
+    memcpy (aabb,obj->aabb,6*sizeof(dReal));
+
+    // restore the pos and R
+    obj->final_posr = posr_bak;
+}
+
+
+// utility function for dCollideTransform() : compute final pos and R
+// for the encapsulated geom object
+
+void dxGeomTransform::computeFinalTx()
+{
+    dMultiply0_331 (transform_posr.pos,final_posr->R,obj->final_posr->pos);
+    transform_posr.pos[0] += final_posr->pos[0];
+    transform_posr.pos[1] += final_posr->pos[1];
+    transform_posr.pos[2] += final_posr->pos[2];
+    dMultiply0_333 (transform_posr.R,final_posr->R,obj->final_posr->R);
+}
+
+//****************************************************************************
+// collider function:
+// this collides a transformed geom with another geom. the other geom can
+// also be a transformed geom, but this case is not handled specially.
+
+int dCollideTransform (dxGeom *o1, dxGeom *o2, int flags,
+                       dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dGeomTransformClass);
+
+    dxGeomTransform *tr = (dxGeomTransform*) o1;
+    if (!tr->obj) return 0;
+    dUASSERT (tr->obj->parent_space==0,
+        "GeomTransform encapsulated object must not be in a space");
+    dUASSERT (tr->obj->body==0,
+        "GeomTransform encapsulated object must not be attached "
+        "to a body");
+
+    // backup the relative pos and R pointers of the encapsulated geom object,
+    // and the body pointer
+    dxPosR *posr_bak = tr->obj->final_posr;
+    dxBody *bodybak = tr->obj->body;
+
+    // compute temporary pos and R for the encapsulated geom object.
+    // note that final_pos and final_R are valid if no GEOM_AABB_BAD flag,
+    // because computeFinalTx() will have already been called in
+    // dxGeomTransform::computeAABB()
+
+    if (tr->gflags & GEOM_AABB_BAD) tr->computeFinalTx();
+    tr->obj->final_posr = &tr->transform_posr;
+    tr->obj->body = o1->body;
+
+    // do the collision
+    int n = dCollide (tr->obj,o2,flags,contact,skip);
+
+    // if required, adjust the 'g1' values in the generated contacts so that
+    // thay indicated the GeomTransform object instead of the encapsulated
+    // object.
+    if (tr->infomode) {
+        for (int i=0; i<n; i++) {
+            dContactGeom *c = CONTACT(contact,skip*i);
+            c->g1 = o1;
+        }
+    }
+
+    // restore the pos, R and body
+    tr->obj->final_posr = posr_bak;
+    tr->obj->body = bodybak;
+    return n;
+}
+
+//****************************************************************************
+// public API
+
+dGeomID dCreateGeomTransform (dSpaceID space)
+{
+    return new dxGeomTransform (space);
+}
+
+
+void dGeomTransformSetGeom (dGeomID g, dGeomID obj)
+{
+    dUASSERT (g && g->type == dGeomTransformClass,
+        "argument not a geom transform");
+    dxGeomTransform *tr = (dxGeomTransform*) g;
+    if (tr->obj && tr->cleanup) delete tr->obj;
+    tr->obj = obj;
+}
+
+
+dGeomID dGeomTransformGetGeom (dGeomID g)
+{
+    dUASSERT (g && g->type == dGeomTransformClass,
+        "argument not a geom transform");
+    dxGeomTransform *tr = (dxGeomTransform*) g;
+    return tr->obj;
+}
+
+
+void dGeomTransformSetCleanup (dGeomID g, int mode)
+{
+    dUASSERT (g && g->type == dGeomTransformClass,
+        "argument not a geom transform");
+    dxGeomTransform *tr = (dxGeomTransform*) g;
+    tr->cleanup = mode;
+}
+
+
+int dGeomTransformGetCleanup (dGeomID g)
+{
+    dUASSERT (g && g->type == dGeomTransformClass,
+        "argument not a geom transform");
+    dxGeomTransform *tr = (dxGeomTransform*) g;
+    return tr->cleanup;
+}
+
+
+void dGeomTransformSetInfo (dGeomID g, int mode)
+{
+    dUASSERT (g && g->type == dGeomTransformClass,
+        "argument not a geom transform");
+    dxGeomTransform *tr = (dxGeomTransform*) g;
+    tr->infomode = mode;
+}
+
+
+int dGeomTransformGetInfo (dGeomID g)
+{
+    dUASSERT (g && g->type == dGeomTransformClass,
+        "argument not a geom transform");
+    dxGeomTransform *tr = (dxGeomTransform*) g;
+    return tr->infomode;
+}
+
diff --git a/libs/ode-0.16.1/ode/src/collision_transform.h b/libs/ode-0.16.1/ode/src/collision_transform.h
new file mode 100644
index 0000000..c3cd27c
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_transform.h
@@ -0,0 +1,39 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+geom transform
+
+*/
+
+#ifndef _ODE_COLLISION_TRANSFORM_H_
+#define _ODE_COLLISION_TRANSFORM_H_
+
+#include <ode/common.h>
+#include "collision_kernel.h"
+
+
+int dCollideTransform (dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_box.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_box.cpp
new file mode 100644
index 0000000..521ed43
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_box.cpp
@@ -0,0 +1,1380 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+/*************************************************************************
+ *                                                                       *
+ * Triangle-box collider by Alen Ladavac and Vedran Klanac.              *
+ * Ported to ODE by Oskari Nyman.                                        *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_util.h"
+#include "collision_trimesh_internal.h"
+
+#if dTRIMESH_ENABLED
+
+
+// largest number, double or float
+#if defined(dSINGLE)
+#define MAXVALUE FLT_MAX
+#else
+#define MAXVALUE DBL_MAX
+#endif
+
+
+// dVector3
+// r=a-b
+#define SUBTRACT(a,b,r) dSubtractVectors3(r, a, b)
+
+
+// dVector3
+// a=b
+#define SET(a,b) dCopyVector3(a, b)
+
+
+// dMatrix3
+// a=b
+#define SETM(a,b) dCopyMatrix4x4(a, b)
+
+
+// dVector3
+// r=a+b
+#define ADD(a,b,r) dAddVectors3(r, a, b)
+
+
+// dMatrix3, int, dVector3
+// v=column a from m
+#define GETCOL(m,a,v) dGetMatrixColumn3(v, m, a)
+
+
+// dVector4, dVector3
+// distance between plane p and point v
+#define POINTDISTANCE(p,v) dPointPlaneDistance(v, p)
+
+
+// dVector4, dVector3, dReal
+// construct plane from normal and d
+#define CONSTRUCTPLANE(plane,normal,d) dConstructPlane(normal, d, plane)
+
+
+// dVector3
+// length of vector a
+#define LENGTHOF(a) dCalcVectorLength3(a)
+
+
+struct sTrimeshBoxColliderData
+{
+    sTrimeshBoxColliderData(): m_iBestAxis(0), m_iExitAxis(0), m_ctContacts(0) {}
+
+    void SetupInitialContext(dxTriMesh *TriMesh, dxGeom *BoxGeom,
+        int Flags, dContactGeom* Contacts, int Stride);
+    void TestCollisionForSingleTriangle(int Triint, dVector3 dv[3], bool &bOutFinishSearching);
+
+    bool _cldTestNormal(dReal fp0, dReal fR, dVector3 vNormal, int iAxis);
+    bool _cldTestFace(dReal fp0, dReal fp1, dReal fp2, dReal fR, dReal fD,
+        dVector3 vNormal, int iAxis);
+    bool _cldTestEdge(dReal fp0, dReal fp1, dReal fR, dReal fD,
+        dVector3 vNormal, int iAxis);
+    bool _cldTestSeparatingAxes(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2);
+    void _cldClipping(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2, int TriIndex);
+    bool _cldTestOneTriangle(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2, int TriIndex);
+
+    void GenerateContact(int TriIndex, const dVector3 in_ContactPos, const dVector3 in_Normal, dReal in_Depth);
+
+    // box data
+    dMatrix3 m_mHullBoxRot;
+    dVector3 m_vHullBoxPos;
+    dVector3 m_vBoxHalfSize;
+
+    // mesh data
+    dVector3   m_vHullDstPos;
+
+    // global collider data
+    dVector3 m_vBestNormal;
+    dReal    m_fBestDepth;
+    int    m_iBestAxis;
+    int    m_iExitAxis;
+    dVector3 m_vE0, m_vE1, m_vE2, m_vN;
+
+    // global info for contact creation
+    int m_iFlags;
+    dContactGeom *m_ContactGeoms;
+    int m_iStride;
+    dxGeom *m_Geom1;
+    dxGeom *m_Geom2;
+    int m_ctContacts;
+};
+
+// Test normal of mesh face as separating axis for intersection
+bool sTrimeshBoxColliderData::_cldTestNormal(dReal fp0, dReal fR, dVector3 vNormal, int iAxis)
+{
+    // calculate overlapping interval of box and triangle
+    dReal fDepth = fR+fp0;
+
+    // if we do not overlap
+    if ( fDepth<0 ) {
+        // do nothing
+        return false;
+    }
+
+    // calculate normal's length
+    dReal fLength = LENGTHOF(vNormal);
+    // if long enough
+    if ( fLength > 0.0f ) {
+
+        dReal fOneOverLength = 1.0f/fLength;
+        // normalize depth
+        fDepth = fDepth*fOneOverLength;
+
+        // get minimum depth
+        if (fDepth < m_fBestDepth) {
+            m_vBestNormal[0] = -vNormal[0]*fOneOverLength;
+            m_vBestNormal[1] = -vNormal[1]*fOneOverLength;
+            m_vBestNormal[2] = -vNormal[2]*fOneOverLength;
+            m_iBestAxis = iAxis;
+            //dAASSERT(fDepth>=0);
+            m_fBestDepth = fDepth;
+        }
+    }
+
+    return true;
+}
+
+
+
+
+// Test box axis as separating axis
+bool sTrimeshBoxColliderData::_cldTestFace(dReal fp0, dReal fp1, dReal fp2, dReal fR, dReal fD,
+                                           dVector3 vNormal, int iAxis)
+{
+    dReal fMin, fMax;
+
+    // find min of triangle interval
+    if ( fp0 < fp1 ) {
+        if ( fp0 < fp2 ) {
+            fMin = fp0;
+        } else {
+            fMin = fp2;
+        }
+    } else {
+        if( fp1 < fp2 ) {
+            fMin = fp1;
+        } else {
+            fMin = fp2;
+        }
+    }
+
+    // find max of triangle interval
+    if ( fp0 > fp1 ) {
+        if ( fp0 > fp2 ) {
+            fMax = fp0;
+        } else {
+            fMax = fp2;
+        }
+    } else {
+        if( fp1 > fp2 ) {
+            fMax = fp1;
+        } else {
+            fMax = fp2;
+        }
+    }
+
+    // calculate minimum and maximum depth
+    dReal fDepthMin = fR - fMin;
+    dReal fDepthMax = fMax + fR;
+
+    // if we dont't have overlapping interval
+    if ( fDepthMin < 0 || fDepthMax < 0 ) {
+        // do nothing
+        return false;
+    }
+
+    dReal fDepth = 0;
+
+    // if greater depth is on negative side
+    if ( fDepthMin > fDepthMax ) {
+        // use smaller depth (one from positive side)
+        fDepth = fDepthMax;
+        // flip normal direction
+        vNormal[0] = -vNormal[0];
+        vNormal[1] = -vNormal[1];
+        vNormal[2] = -vNormal[2];
+        fD = -fD;
+        // if greater depth is on positive side
+    } else {
+        // use smaller depth (one from negative side)
+        fDepth = fDepthMin;
+    }
+
+    // if lower depth than best found so far
+    if (fDepth < m_fBestDepth) {
+        // remember current axis as best axis
+        m_vBestNormal[0]  = vNormal[0];
+        m_vBestNormal[1]  = vNormal[1];
+        m_vBestNormal[2]  = vNormal[2];
+        m_iBestAxis    = iAxis;
+        //dAASSERT(fDepth>=0);
+        m_fBestDepth   = fDepth;
+    }
+
+    return true;
+}
+
+// Test cross products of box axis and triangle edges as separating axis
+bool sTrimeshBoxColliderData::_cldTestEdge(dReal fp0, dReal fp1, dReal fR, dReal fD,
+                                           dVector3 vNormal, int iAxis)
+{
+    dReal fMin, fMax;
+
+    // ===== Begin Patch by Francisco Leon, 2006/10/28 =====
+
+    // Fixed Null Normal. This prevents boxes passing
+    // through trimeshes at certain contact angles
+
+    fMin = vNormal[0] * vNormal[0] +
+        vNormal[1] * vNormal[1] +
+        vNormal[2] * vNormal[2];
+
+    if ( fMin <= dEpsilon ) /// THIS NORMAL WOULD BE DANGEROUS
+        return true;
+
+    // ===== Ending Patch by Francisco Leon =====
+
+
+    // calculate min and max interval values
+    if ( fp0 < fp1 ) {
+        fMin = fp0;
+        fMax = fp1;
+    } else {
+        fMin = fp1;
+        fMax = fp0;
+    }
+
+    // check if we overlapp
+    dReal fDepthMin = fR - fMin;
+    dReal fDepthMax = fMax + fR;
+
+    // if we don't overlapp
+    if ( fDepthMin < 0 || fDepthMax < 0 ) {
+        // do nothing
+        return false;
+    }
+
+    dReal fDepth;
+
+    // if greater depth is on negative side
+    if ( fDepthMin > fDepthMax ) {
+        // use smaller depth (one from positive side)
+        fDepth = fDepthMax;
+        // flip normal direction
+        vNormal[0] = -vNormal[0];
+        vNormal[1] = -vNormal[1];
+        vNormal[2] = -vNormal[2];
+        fD = -fD;
+        // if greater depth is on positive side
+    } else {
+        // use smaller depth (one from negative side)
+        fDepth = fDepthMin;
+    }
+
+    // calculate normal's length
+    dReal fLength = LENGTHOF(vNormal);
+
+    // if long enough
+    if ( fLength > 0.0f ) {
+
+        // normalize depth
+        dReal fOneOverLength = 1.0f/fLength;
+        fDepth = fDepth*fOneOverLength;
+        fD*=fOneOverLength;
+
+        // if lower depth than best found so far (favor face over edges)
+        if (fDepth*1.5f < m_fBestDepth) {
+            // remember current axis as best axis
+            m_vBestNormal[0]  = vNormal[0]*fOneOverLength;
+            m_vBestNormal[1]  = vNormal[1]*fOneOverLength;
+            m_vBestNormal[2]  = vNormal[2]*fOneOverLength;
+            m_iBestAxis    = iAxis;
+            //dAASSERT(fDepth>=0);
+            m_fBestDepth   = fDepth;
+        }
+    }
+
+    return true;
+}
+
+
+// clip polygon with plane and generate new polygon points
+static void _cldClipPolyToPlane( dVector3 avArrayIn[], int ctIn,
+                                dVector3 avArrayOut[], int &ctOut,
+                                const dVector4 &plPlane )
+{
+    // start with no output points
+    ctOut = 0;
+
+    int i0 = ctIn-1;
+
+    // for each edge in input polygon
+    for (int i1=0; i1<ctIn; i0=i1, i1++) {
+
+
+        // calculate distance of edge points to plane
+        dReal fDistance0 = POINTDISTANCE( plPlane ,avArrayIn[i0] );
+        dReal fDistance1 = POINTDISTANCE( plPlane ,avArrayIn[i1] );
+
+
+        // if first point is in front of plane
+        if( fDistance0 >= 0 ) {
+            // emit point
+            avArrayOut[ctOut][0] = avArrayIn[i0][0];
+            avArrayOut[ctOut][1] = avArrayIn[i0][1];
+            avArrayOut[ctOut][2] = avArrayIn[i0][2];
+            ctOut++;
+        }
+
+        // if points are on different sides
+        if( (fDistance0 > 0 && fDistance1 < 0) || ( fDistance0 < 0 && fDistance1 > 0) ) {
+
+            // find intersection point of edge and plane
+            dVector3 vIntersectionPoint;
+            vIntersectionPoint[0]= avArrayIn[i0][0] - (avArrayIn[i0][0]-avArrayIn[i1][0])*fDistance0/(fDistance0-fDistance1);
+            vIntersectionPoint[1]= avArrayIn[i0][1] - (avArrayIn[i0][1]-avArrayIn[i1][1])*fDistance0/(fDistance0-fDistance1);
+            vIntersectionPoint[2]= avArrayIn[i0][2] - (avArrayIn[i0][2]-avArrayIn[i1][2])*fDistance0/(fDistance0-fDistance1);
+
+            // emit intersection point
+            avArrayOut[ctOut][0] = vIntersectionPoint[0];
+            avArrayOut[ctOut][1] = vIntersectionPoint[1];
+            avArrayOut[ctOut][2] = vIntersectionPoint[2];
+            ctOut++;
+        }
+    }
+
+}
+
+
+
+
+bool sTrimeshBoxColliderData::_cldTestSeparatingAxes(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2) {
+    // reset best axis
+    m_iBestAxis = 0;
+    m_iExitAxis = -1;
+    m_fBestDepth = MAXVALUE;
+
+    // calculate edges
+    SUBTRACT(v1,v0,m_vE0);
+    SUBTRACT(v2,v0,m_vE1);
+    SUBTRACT(m_vE1,m_vE0,m_vE2);
+
+    // calculate poly normal
+    dCalcVectorCross3(m_vN,m_vE0,m_vE1);
+
+    // calculate length of face normal
+    dReal fNLen = LENGTHOF(m_vN);
+
+    // Even though all triangles might be initially valid, 
+    // a triangle may degenerate into a segment after applying 
+    // space transformation.
+    if (!fNLen) {
+        return false;
+    }
+
+    // extract box axes as vectors
+    dVector3 vA0,vA1,vA2;
+    GETCOL(m_mHullBoxRot,0,vA0);
+    GETCOL(m_mHullBoxRot,1,vA1);
+    GETCOL(m_mHullBoxRot,2,vA2);
+
+    // box halfsizes
+    dReal fa0 = m_vBoxHalfSize[0];
+    dReal fa1 = m_vBoxHalfSize[1];
+    dReal fa2 = m_vBoxHalfSize[2];
+
+    // calculate relative position between box and triangle
+    dVector3 vD;
+    SUBTRACT(v0,m_vHullBoxPos,vD);
+
+    dVector3 vL;
+    dReal fp0, fp1, fp2, fR, fD;
+
+    // Test separating axes for intersection
+    // ************************************************
+    // Axis 1 - Triangle Normal
+    SET(vL,m_vN);
+    fp0  = dCalcVectorDot3(vL,vD);
+    fp1  = fp0;
+    fp2  = fp0;
+    fR=fa0*dFabs( dCalcVectorDot3(m_vN,vA0) ) + fa1 * dFabs( dCalcVectorDot3(m_vN,vA1) ) + fa2 * dFabs( dCalcVectorDot3(m_vN,vA2) );
+
+    if (!_cldTestNormal(fp0, fR, vL, 1)) {
+        m_iExitAxis=1;
+        return false;
+    }
+
+    // ************************************************
+
+    // Test Faces
+    // ************************************************
+    // Axis 2 - Box X-Axis
+    SET(vL,vA0);
+    fD  = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0 + dCalcVectorDot3(vA0,m_vE0);
+    fp2 = fp0 + dCalcVectorDot3(vA0,m_vE1);
+    fR  = fa0;
+
+    if (!_cldTestFace(fp0, fp1, fp2, fR, fD, vL, 2)) {
+        m_iExitAxis=2;
+        return false;
+    }
+    // ************************************************
+
+    // ************************************************
+    // Axis 3 - Box Y-Axis
+    SET(vL,vA1);
+    fD = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0 + dCalcVectorDot3(vA1,m_vE0);
+    fp2 = fp0 + dCalcVectorDot3(vA1,m_vE1);
+    fR  = fa1;
+
+    if (!_cldTestFace(fp0, fp1, fp2, fR, fD, vL, 3)) {
+        m_iExitAxis=3;
+        return false;
+    }
+
+    // ************************************************
+
+    // ************************************************
+    // Axis 4 - Box Z-Axis
+    SET(vL,vA2);
+    fD = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0 + dCalcVectorDot3(vA2,m_vE0);
+    fp2 = fp0 + dCalcVectorDot3(vA2,m_vE1);
+    fR  = fa2;
+
+    if (!_cldTestFace(fp0, fp1, fp2, fR, fD, vL, 4)) {
+        m_iExitAxis=4;
+        return false;
+    }
+
+    // ************************************************
+
+    // Test Edges
+    // ************************************************
+    // Axis 5 - Box X-Axis cross Edge0
+    dCalcVectorCross3(vL,vA0,m_vE0);
+    fD  = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0;
+    fp2 = fp0 + dCalcVectorDot3(vA0,m_vN);
+    fR  = fa1 * dFabs(dCalcVectorDot3(vA2,m_vE0)) + fa2 * dFabs(dCalcVectorDot3(vA1,m_vE0));
+
+    if (!_cldTestEdge(fp1, fp2, fR, fD, vL, 5)) {
+        m_iExitAxis=5;
+        return false;
+    }
+    // ************************************************
+
+    // ************************************************
+    // Axis 6 - Box X-Axis cross Edge1
+    dCalcVectorCross3(vL,vA0,m_vE1);
+    fD  = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0 - dCalcVectorDot3(vA0,m_vN);
+    fp2 = fp0;
+    fR  = fa1 * dFabs(dCalcVectorDot3(vA2,m_vE1)) + fa2 * dFabs(dCalcVectorDot3(vA1,m_vE1));
+
+    if (!_cldTestEdge(fp0, fp1, fR, fD, vL, 6)) {
+        m_iExitAxis=6;
+        return false;
+    }
+    // ************************************************
+
+    // ************************************************
+    // Axis 7 - Box X-Axis cross Edge2
+    dCalcVectorCross3(vL,vA0,m_vE2);
+    fD  = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0 - dCalcVectorDot3(vA0,m_vN);
+    fp2 = fp0 - dCalcVectorDot3(vA0,m_vN);
+    fR  = fa1 * dFabs(dCalcVectorDot3(vA2,m_vE2)) + fa2 * dFabs(dCalcVectorDot3(vA1,m_vE2));
+
+    if (!_cldTestEdge(fp0, fp1, fR, fD, vL, 7)) {
+        m_iExitAxis=7;
+        return false;
+    }
+
+    // ************************************************
+
+    // ************************************************
+    // Axis 8 - Box Y-Axis cross Edge0
+    dCalcVectorCross3(vL,vA1,m_vE0);
+    fD  = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0;
+    fp2 = fp0 + dCalcVectorDot3(vA1,m_vN);
+    fR  = fa0 * dFabs(dCalcVectorDot3(vA2,m_vE0)) + fa2 * dFabs(dCalcVectorDot3(vA0,m_vE0));
+
+    if (!_cldTestEdge(fp0, fp2, fR, fD, vL, 8)) {
+        m_iExitAxis=8;
+        return false;
+    }
+
+    // ************************************************
+
+    // ************************************************
+    // Axis 9 - Box Y-Axis cross Edge1
+    dCalcVectorCross3(vL,vA1,m_vE1);
+    fD  = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0 - dCalcVectorDot3(vA1,m_vN);
+    fp2 = fp0;
+    fR  = fa0 * dFabs(dCalcVectorDot3(vA2,m_vE1)) + fa2 * dFabs(dCalcVectorDot3(vA0,m_vE1));
+
+    if (!_cldTestEdge(fp0, fp1, fR, fD, vL, 9)) {
+        m_iExitAxis=9;
+        return false;
+    }
+
+    // ************************************************
+
+    // ************************************************
+    // Axis 10 - Box Y-Axis cross Edge2
+    dCalcVectorCross3(vL,vA1,m_vE2);
+    fD  = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0 - dCalcVectorDot3(vA1,m_vN);
+    fp2 = fp0 - dCalcVectorDot3(vA1,m_vN);
+    fR  = fa0 * dFabs(dCalcVectorDot3(vA2,m_vE2)) + fa2 * dFabs(dCalcVectorDot3(vA0,m_vE2));
+
+    if (!_cldTestEdge(fp0, fp1, fR, fD, vL, 10)) {
+        m_iExitAxis=10;
+        return false;
+    }
+
+    // ************************************************
+
+    // ************************************************
+    // Axis 11 - Box Z-Axis cross Edge0
+    dCalcVectorCross3(vL,vA2,m_vE0);
+    fD  = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0;
+    fp2 = fp0 + dCalcVectorDot3(vA2,m_vN);
+    fR  = fa0 * dFabs(dCalcVectorDot3(vA1,m_vE0)) + fa1 * dFabs(dCalcVectorDot3(vA0,m_vE0));
+
+    if (!_cldTestEdge(fp0, fp2, fR, fD, vL, 11)) {
+        m_iExitAxis=11;
+        return false;
+    }
+    // ************************************************
+
+    // ************************************************
+    // Axis 12 - Box Z-Axis cross Edge1
+    dCalcVectorCross3(vL,vA2,m_vE1);
+    fD  = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0 - dCalcVectorDot3(vA2,m_vN);
+    fp2 = fp0;
+    fR  = fa0 * dFabs(dCalcVectorDot3(vA1,m_vE1)) + fa1 * dFabs(dCalcVectorDot3(vA0,m_vE1));
+
+    if (!_cldTestEdge(fp0, fp1, fR, fD, vL, 12)) {
+        m_iExitAxis=12;
+        return false;
+    }
+    // ************************************************
+
+    // ************************************************
+    // Axis 13 - Box Z-Axis cross Edge2
+    dCalcVectorCross3(vL,vA2,m_vE2);
+    fD  = dCalcVectorDot3(vL,m_vN)/fNLen;
+    fp0 = dCalcVectorDot3(vL,vD);
+    fp1 = fp0 - dCalcVectorDot3(vA2,m_vN);
+    fp2 = fp0 - dCalcVectorDot3(vA2,m_vN);
+    fR  = fa0 * dFabs(dCalcVectorDot3(vA1,m_vE2)) + fa1 * dFabs(dCalcVectorDot3(vA0,m_vE2));
+
+    if (!_cldTestEdge(fp0, fp1, fR, fD, vL, 13)) {
+        m_iExitAxis=13;
+        return false;
+    }
+
+    // ************************************************
+    return true;
+}
+
+
+
+
+
+// find two closest points on two lines
+static bool _cldClosestPointOnTwoLines(
+    dVector3 vPoint1, dVector3 vLenVec1, dVector3 vPoint2, dVector3 vLenVec2,
+    dReal &fvalue1, dReal &fvalue2)
+{
+    // calculate denominator
+    dVector3 vp;
+    SUBTRACT(vPoint2,vPoint1,vp);
+    dReal fuaub  = dCalcVectorDot3(vLenVec1,vLenVec2);
+    dReal fq1    = dCalcVectorDot3(vLenVec1,vp);
+    dReal fq2    = -dCalcVectorDot3(vLenVec2,vp);
+    dReal fd     = 1.0f - fuaub * fuaub;
+
+    // if denominator is positive
+    if (fd > 0.0f) {
+        // calculate points of closest approach
+        fd = 1.0f/fd;
+        fvalue1 = (fq1 + fuaub*fq2)*fd;
+        fvalue2 = (fuaub*fq1 + fq2)*fd;
+        return true;
+        // otherwise
+    } else {
+        // lines are parallel
+        fvalue1 = 0.0f;
+        fvalue2 = 0.0f;
+        return false;
+    }
+}
+
+
+
+
+
+// clip and generate contacts
+void sTrimeshBoxColliderData::_cldClipping(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2, int TriIndex) {
+    dIASSERT( !(m_iFlags & CONTACTS_UNIMPORTANT) || m_ctContacts < (m_iFlags & NUMC_MASK) ); // Do not call the function if there is no room to store results
+
+    // if we have edge/edge intersection
+    if (m_iBestAxis > 4 ) {
+        dVector3 vub,vPb,vPa;
+
+        SET(vPa,m_vHullBoxPos);
+
+        // calculate point on box edge
+        for( int i=0; i<3; i++) {
+            dVector3 vRotCol;
+            GETCOL(m_mHullBoxRot,i,vRotCol);
+            dReal fSign = dCalcVectorDot3(m_vBestNormal,vRotCol) > 0 ? 1.0f : -1.0f;
+
+            vPa[0] += fSign * m_vBoxHalfSize[i] * vRotCol[0];
+            vPa[1] += fSign * m_vBoxHalfSize[i] * vRotCol[1];
+            vPa[2] += fSign * m_vBoxHalfSize[i] * vRotCol[2];
+        }
+
+        int iEdge = (m_iBestAxis-5)%3;
+
+        // decide which edge is on triangle
+        if ( iEdge == 0 ) {
+            SET(vPb,v0);
+            SET(vub,m_vE0);
+        } else if ( iEdge == 1) {
+            SET(vPb,v2);
+            SET(vub,m_vE1);
+        } else {
+            SET(vPb,v1);
+            SET(vub,m_vE2);
+        }
+
+
+        // setup direction parameter for face edge
+        dNormalize3(vub);
+
+        dReal fParam1, fParam2;
+
+        // setup direction parameter for box edge
+        dVector3 vua;
+        int col=(m_iBestAxis-5)/3;
+        GETCOL(m_mHullBoxRot,col,vua);
+
+        // find two closest points on both edges
+        _cldClosestPointOnTwoLines( vPa, vua, vPb, vub, fParam1, fParam2 );
+        vPa[0] += vua[0]*fParam1;
+        vPa[1] += vua[1]*fParam1;
+        vPa[2] += vua[2]*fParam1;
+
+        vPb[0] += vub[0]*fParam2;
+        vPb[1] += vub[1]*fParam2;
+        vPb[2] += vub[2]*fParam2;
+
+        // calculate collision point
+        dVector3 vPntTmp;
+        ADD(vPa,vPb,vPntTmp);
+
+        vPntTmp[0]*=0.5f;
+        vPntTmp[1]*=0.5f;
+        vPntTmp[2]*=0.5f;
+
+        // generate contact point between two closest points
+        GenerateContact(TriIndex, vPntTmp, m_vBestNormal, m_fBestDepth);
+
+
+        // if triangle is the referent face then clip box to triangle face
+    } else if (m_iBestAxis == 1) {
+
+        dVector3 vNormal2;
+        vNormal2[0]=-m_vBestNormal[0];
+        vNormal2[1]=-m_vBestNormal[1];
+        vNormal2[2]=-m_vBestNormal[2];
+
+
+        // vNr is normal in box frame, pointing from triangle to box
+        dMatrix3 mTransposed;
+        mTransposed[0*4+0]=m_mHullBoxRot[0*4+0];
+        mTransposed[0*4+1]=m_mHullBoxRot[1*4+0];
+        mTransposed[0*4+2]=m_mHullBoxRot[2*4+0];
+
+        mTransposed[1*4+0]=m_mHullBoxRot[0*4+1];
+        mTransposed[1*4+1]=m_mHullBoxRot[1*4+1];
+        mTransposed[1*4+2]=m_mHullBoxRot[2*4+1];
+
+        mTransposed[2*4+0]=m_mHullBoxRot[0*4+2];
+        mTransposed[2*4+1]=m_mHullBoxRot[1*4+2];
+        mTransposed[2*4+2]=m_mHullBoxRot[2*4+2];
+
+        dVector3 vNr;
+        vNr[0]=mTransposed[0*4+0]*vNormal2[0]+  mTransposed[0*4+1]*vNormal2[1]+  mTransposed[0*4+2]*vNormal2[2];
+        vNr[1]=mTransposed[1*4+0]*vNormal2[0]+  mTransposed[1*4+1]*vNormal2[1]+  mTransposed[1*4+2]*vNormal2[2];
+        vNr[2]=mTransposed[2*4+0]*vNormal2[0]+  mTransposed[2*4+1]*vNormal2[1]+  mTransposed[2*4+2]*vNormal2[2];
+
+
+        dVector3 vAbsNormal;
+        vAbsNormal[0] = dFabs( vNr[0] );
+        vAbsNormal[1] = dFabs( vNr[1] );
+        vAbsNormal[2] = dFabs( vNr[2] );
+
+        // get closest face from box
+        int iB0, iB1, iB2;
+        if (vAbsNormal[1] > vAbsNormal[0]) {
+            if (vAbsNormal[1] > vAbsNormal[2]) {
+                iB1 = 0;  iB0 = 1;  iB2 = 2;
+            } else {
+                iB1 = 0;  iB2 = 1;  iB0 = 2;
+            }
+        } else {
+
+            if (vAbsNormal[0] > vAbsNormal[2]) {
+                iB0 = 0;  iB1 = 1;  iB2 = 2;
+            } else {
+                iB1 = 0;  iB2 = 1;  iB0 = 2;
+            }
+        }
+
+        // Here find center of box face we are going to project
+        dVector3 vCenter;
+        dVector3 vRotCol;
+        GETCOL(m_mHullBoxRot,iB0,vRotCol);
+
+        if (vNr[iB0] > 0) {
+            vCenter[0] = m_vHullBoxPos[0] - v0[0] - m_vBoxHalfSize[iB0] * vRotCol[0];
+            vCenter[1] = m_vHullBoxPos[1] - v0[1] - m_vBoxHalfSize[iB0] * vRotCol[1];
+            vCenter[2] = m_vHullBoxPos[2] - v0[2] - m_vBoxHalfSize[iB0] * vRotCol[2];
+        } else {
+            vCenter[0] = m_vHullBoxPos[0] - v0[0] + m_vBoxHalfSize[iB0] * vRotCol[0];
+            vCenter[1] = m_vHullBoxPos[1] - v0[1] + m_vBoxHalfSize[iB0] * vRotCol[1];
+            vCenter[2] = m_vHullBoxPos[2] - v0[2] + m_vBoxHalfSize[iB0] * vRotCol[2];
+        }
+
+        // Here find 4 corner points of box
+        dVector3 avPoints[4];
+
+        dVector3 vRotCol2;
+        GETCOL(m_mHullBoxRot,iB1,vRotCol);
+        GETCOL(m_mHullBoxRot,iB2,vRotCol2);
+
+        for(int x=0;x<3;x++) {
+            avPoints[0][x] = vCenter[x] + (m_vBoxHalfSize[iB1] * vRotCol[x]) - (m_vBoxHalfSize[iB2] * vRotCol2[x]);
+            avPoints[1][x] = vCenter[x] - (m_vBoxHalfSize[iB1] * vRotCol[x]) - (m_vBoxHalfSize[iB2] * vRotCol2[x]);
+            avPoints[2][x] = vCenter[x] - (m_vBoxHalfSize[iB1] * vRotCol[x]) + (m_vBoxHalfSize[iB2] * vRotCol2[x]);
+            avPoints[3][x] = vCenter[x] + (m_vBoxHalfSize[iB1] * vRotCol[x]) + (m_vBoxHalfSize[iB2] * vRotCol2[x]);
+        }
+
+        // clip Box face with 4 planes of triangle (1 face plane, 3 egde planes)
+        dVector3 avTempArray1[9];
+        dVector3 avTempArray2[9];
+        dVector4 plPlane;
+
+        int iTempCnt1=0;
+        int iTempCnt2=0;
+
+        // zeroify vectors - necessary?
+        for(int i=0; i<9; i++) {
+            avTempArray1[i][0]=0;
+            avTempArray1[i][1]=0;
+            avTempArray1[i][2]=0;
+
+            avTempArray2[i][0]=0;
+            avTempArray2[i][1]=0;
+            avTempArray2[i][2]=0;
+        }
+
+
+        // Normal plane
+        dVector3 vTemp;
+        vTemp[0]=-m_vN[0];
+        vTemp[1]=-m_vN[1];
+        vTemp[2]=-m_vN[2];
+        dNormalize3(vTemp);
+        CONSTRUCTPLANE(plPlane,vTemp,0);
+
+        _cldClipPolyToPlane( avPoints, 4, avTempArray1, iTempCnt1, plPlane  );
+
+
+        // Plane p0
+        dVector3 vTemp2;
+        SUBTRACT(v1,v0,vTemp2);
+        dCalcVectorCross3(vTemp,m_vN,vTemp2);
+        dNormalize3(vTemp);
+        CONSTRUCTPLANE(plPlane,vTemp,0);
+
+        _cldClipPolyToPlane( avTempArray1, iTempCnt1, avTempArray2, iTempCnt2, plPlane  );
+
+        // Plane p1
+        SUBTRACT(v2,v1,vTemp2);
+        dCalcVectorCross3(vTemp,m_vN,vTemp2);
+        dNormalize3(vTemp);
+        SUBTRACT(v0,v2,vTemp2);
+        CONSTRUCTPLANE(plPlane,vTemp,dCalcVectorDot3(vTemp2,vTemp));
+
+        _cldClipPolyToPlane( avTempArray2, iTempCnt2, avTempArray1, iTempCnt1, plPlane  );
+
+        // Plane p2
+        SUBTRACT(v0,v2,vTemp2);
+        dCalcVectorCross3(vTemp,m_vN,vTemp2);
+        dNormalize3(vTemp);
+        CONSTRUCTPLANE(plPlane,vTemp,0);
+
+        _cldClipPolyToPlane( avTempArray1, iTempCnt1, avTempArray2, iTempCnt2, plPlane  );
+
+        // END of clipping polygons
+
+        // for each generated contact point
+        for ( int i=0; i<iTempCnt2; i++ ) {
+            // calculate depth
+            dReal fTempDepth = dCalcVectorDot3(vNormal2,avTempArray2[i]);
+
+            // clamp depth to zero
+            if (fTempDepth > 0) {
+                fTempDepth = 0;
+            }
+
+            dVector3 vPntTmp;
+            ADD(avTempArray2[i],v0,vPntTmp);
+
+            GenerateContact(TriIndex, vPntTmp, m_vBestNormal, -fTempDepth);
+
+            if ((m_ctContacts | CONTACTS_UNIMPORTANT) == (m_iFlags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                break;
+            }
+        }
+
+        //dAASSERT(m_ctContacts>0);
+
+        // if box face is the referent face, then clip triangle on box face
+    } else { // 2 <= if iBestAxis <= 4
+
+        // get normal of box face
+        dVector3 vNormal2;
+        SET(vNormal2,m_vBestNormal);
+
+        // get indices of box axes in correct order
+        int iA0,iA1,iA2;
+        iA0 = m_iBestAxis-2;
+        if ( iA0 == 0 ) {
+            iA1 = 1; iA2 = 2;
+        } else if ( iA0 == 1 ) {
+            iA1 = 0; iA2 = 2;
+        } else {
+            iA1 = 0; iA2 = 1;
+        }
+
+        dVector3 avPoints[3];
+        // calculate triangle vertices in box frame
+        SUBTRACT(v0,m_vHullBoxPos,avPoints[0]);
+        SUBTRACT(v1,m_vHullBoxPos,avPoints[1]);
+        SUBTRACT(v2,m_vHullBoxPos,avPoints[2]);
+
+        // CLIP Polygons
+        // define temp data for clipping
+        dVector3 avTempArray1[9];
+        dVector3 avTempArray2[9];
+
+        int iTempCnt1, iTempCnt2;
+
+        // zeroify vectors - necessary?
+        for(int i=0; i<9; i++) {
+            avTempArray1[i][0]=0;
+            avTempArray1[i][1]=0;
+            avTempArray1[i][2]=0;
+
+            avTempArray2[i][0]=0;
+            avTempArray2[i][1]=0;
+            avTempArray2[i][2]=0;
+        }
+
+        // clip triangle with 5 box planes (1 face plane, 4 edge planes)
+
+        dVector4 plPlane;
+
+        // Normal plane
+        dVector3 vTemp;
+        vTemp[0]=-vNormal2[0];
+        vTemp[1]=-vNormal2[1];
+        vTemp[2]=-vNormal2[2];
+        CONSTRUCTPLANE(plPlane,vTemp,m_vBoxHalfSize[iA0]);
+
+        _cldClipPolyToPlane( avPoints, 3, avTempArray1, iTempCnt1, plPlane );
+
+
+        // Plane p0
+        GETCOL(m_mHullBoxRot,iA1,vTemp);
+        CONSTRUCTPLANE(plPlane,vTemp,m_vBoxHalfSize[iA1]);
+
+        _cldClipPolyToPlane( avTempArray1, iTempCnt1, avTempArray2, iTempCnt2, plPlane );
+
+
+        // Plane p1
+        GETCOL(m_mHullBoxRot,iA1,vTemp);
+        vTemp[0]=-vTemp[0];
+        vTemp[1]=-vTemp[1];
+        vTemp[2]=-vTemp[2];
+        CONSTRUCTPLANE(plPlane,vTemp,m_vBoxHalfSize[iA1]);
+
+        _cldClipPolyToPlane( avTempArray2, iTempCnt2, avTempArray1, iTempCnt1, plPlane );
+
+        // Plane p2
+        GETCOL(m_mHullBoxRot,iA2,vTemp);
+        CONSTRUCTPLANE(plPlane,vTemp,m_vBoxHalfSize[iA2]);
+
+        _cldClipPolyToPlane( avTempArray1, iTempCnt1, avTempArray2, iTempCnt2, plPlane );
+
+        // Plane p3
+        GETCOL(m_mHullBoxRot,iA2,vTemp);
+        vTemp[0]=-vTemp[0];
+        vTemp[1]=-vTemp[1];
+        vTemp[2]=-vTemp[2];
+        CONSTRUCTPLANE(plPlane,vTemp,m_vBoxHalfSize[iA2]);
+
+        _cldClipPolyToPlane( avTempArray2, iTempCnt2, avTempArray1, iTempCnt1, plPlane );
+
+
+        // for each generated contact point
+        for ( int i=0; i<iTempCnt1; i++ ) {
+            // calculate depth
+            dReal fTempDepth = dCalcVectorDot3(vNormal2,avTempArray1[i])-m_vBoxHalfSize[iA0];
+
+            // clamp depth to zero
+            if (fTempDepth > 0) {
+                fTempDepth = 0;
+            }
+
+            // generate contact data
+            dVector3 vPntTmp;
+            ADD(avTempArray1[i],m_vHullBoxPos,vPntTmp);
+
+            GenerateContact(TriIndex, vPntTmp, m_vBestNormal, -fTempDepth);
+
+            if ((m_ctContacts | CONTACTS_UNIMPORTANT) == (m_iFlags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                break;
+            }
+        }
+
+        //dAASSERT(m_ctContacts>0);
+    }
+}
+
+// GenerateContact - Written by Jeff Smith (jeff@burri.to)
+//   Generate a "unique" contact.  A unique contact has a unique
+//   position or normal.  If the potential contact has the same
+//   position and normal as an existing contact, but a larger
+//   penetration depth, this new depth is used instead
+//
+void sTrimeshBoxColliderData::GenerateContact(int TriIndex, const dVector3 in_ContactPos, const dVector3 in_Normal, dReal in_Depth)
+{
+    int TriCount = m_ctContacts;
+
+    do
+    {
+        dContactGeom* TgtContact = NULL;
+        bool deeper = false;
+
+        if (!(m_iFlags & CONTACTS_UNIMPORTANT))
+        {
+            dReal MinDepth = dInfinity;
+            dContactGeom* MinContact = NULL;
+
+            bool duplicate = false;
+            for (int i = 0; i < TriCount; i++)
+            {
+                dContactGeom* Contact = SAFECONTACT(m_iFlags, m_ContactGeoms, i, m_iStride);
+
+                // same position?
+                dVector3 diff;
+                dSubtractVectors3(diff, in_ContactPos, Contact->pos);
+
+                if (dCalcVectorDot3(diff, diff) < dEpsilon)
+                {
+                    // same normal?
+                    if (REAL(1.0) - dCalcVectorDot3(in_Normal, Contact->normal) < dEpsilon)
+                    {
+                        if (in_Depth > Contact->depth)
+                        {
+                            Contact->depth = in_Depth;
+                            Contact->side1 = TriIndex;
+                        }
+
+                        duplicate = true;
+                        break;
+                    }
+                }
+
+                if (Contact->depth < MinDepth)
+                {
+                    MinDepth = Contact->depth;
+                    MinContact = Contact;
+                }
+            }
+            if (duplicate)
+            {
+                break;
+            }
+
+            if (TriCount == (m_iFlags & NUMC_MASK))
+            {
+                if (!(MinDepth < in_Depth))
+                {
+                    break;
+                }
+
+                TgtContact = MinContact;
+                deeper = true;
+            }
+        }
+        else
+        {
+            dIASSERT(TriCount < (m_iFlags & NUMC_MASK));
+        }
+
+        if (!deeper)
+        {
+            // Add a new contact
+            TgtContact = SAFECONTACT(m_iFlags, m_ContactGeoms, TriCount, m_iStride);
+            TriCount++;
+
+            TgtContact->pos[3] = 0.0;
+
+            TgtContact->normal[3] = 0.0;
+
+            TgtContact->g1 = m_Geom1;
+            TgtContact->g2 = m_Geom2;
+
+            TgtContact->side2 = -1;
+        }
+
+        TgtContact->pos[0] = in_ContactPos[0];
+        TgtContact->pos[1] = in_ContactPos[1];
+        TgtContact->pos[2] = in_ContactPos[2];
+
+        TgtContact->normal[0] = in_Normal[0];
+        TgtContact->normal[1] = in_Normal[1];
+        TgtContact->normal[2] = in_Normal[2];
+
+        TgtContact->depth = in_Depth;
+
+        TgtContact->side1 = TriIndex;
+
+        m_ctContacts = TriCount;
+    }
+    while (false);
+}
+
+
+
+
+
+void sTrimeshBoxColliderData::SetupInitialContext(dxTriMesh *TriMesh, dxGeom *BoxGeom,
+                                                  int Flags, dContactGeom* Contacts, int Stride)
+{
+    // get source hull position, orientation and half size
+    const dMatrix3& mRotBox=*(const dMatrix3*)dGeomGetRotation(BoxGeom);
+    const dVector3& vPosBox=*(const dVector3*)dGeomGetPosition(BoxGeom);
+
+    // to global
+    SETM(m_mHullBoxRot,mRotBox);
+    SET(m_vHullBoxPos,vPosBox);
+
+    dGeomBoxGetLengths(BoxGeom, m_vBoxHalfSize);
+    m_vBoxHalfSize[0] *= 0.5f;
+    m_vBoxHalfSize[1] *= 0.5f;
+    m_vBoxHalfSize[2] *= 0.5f;
+
+    // get destination hull position and orientation
+    const dVector3& vPosMesh=*(const dVector3*)dGeomGetPosition(TriMesh);
+
+    // to global
+    SET(m_vHullDstPos,vPosMesh);
+
+    // global info for contact creation
+    m_ctContacts = 0;
+    m_iStride=Stride;
+    m_iFlags=Flags;
+    m_ContactGeoms=Contacts;
+    m_Geom1=TriMesh;
+    m_Geom2=BoxGeom;
+
+    // reset stuff
+    m_fBestDepth = MAXVALUE;
+    m_vBestNormal[0]=0;
+    m_vBestNormal[1]=0;
+    m_vBestNormal[2]=0;
+}
+
+void sTrimeshBoxColliderData::TestCollisionForSingleTriangle(int Triint, dVector3 dv[3], bool &bOutFinishSearching)
+{
+    bool finish = false;
+
+    // test this triangle
+    if (_cldTestOneTriangle(dv[0], dv[1], dv[2], Triint))
+    {
+        /*
+        NOTE by Oleh_Derevenko:
+        The function continues checking triangles after maximal number
+        of contacts is reached because it selects maximal penetration depths.
+        See also comments in GenerateContact()
+        */
+        finish = ((m_ctContacts | CONTACTS_UNIMPORTANT) == (m_iFlags & (NUMC_MASK | CONTACTS_UNIMPORTANT)));
+    }
+
+    bOutFinishSearching = finish;
+}
+
+// test one mesh triangle on intersection with given box
+bool sTrimeshBoxColliderData::_cldTestOneTriangle(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2, int TriIndex)//, void *pvUser)
+{
+    // do intersection test and find best separating axis
+    if (!_cldTestSeparatingAxes(v0, v1, v2)) {
+        // if not found do nothing
+        return false;
+    }
+
+    // if best separation axis is not found
+    if (m_iBestAxis == 0) {
+        // this should not happen (we should already exit in that case)
+        //dMessage (0, "best separation axis not found");
+        // do nothing
+        return false;
+    }
+
+    _cldClipping(v0, v1, v2, TriIndex);
+    return true;
+}
+
+
+// OPCODE version of box to mesh collider
+#if dTRIMESH_OPCODE
+static void dQueryBTLPotentialCollisionTriangles(OBBCollider &Collider, 
+                                                 const sTrimeshBoxColliderData &cData, dxTriMesh *TriMesh, dxGeom *BoxGeom,
+                                                 OBBCache &BoxCache)
+{
+    // get destination hull position and orientation
+    const dMatrix3& mRotMesh=*(const dMatrix3*)dGeomGetRotation(TriMesh);
+    const dVector3& vPosMesh=*(const dVector3*)dGeomGetPosition(TriMesh);
+
+    Matrix4x4 MeshMatrix;
+    const dVector3 vZeroVector3 = { REAL(0.0), };
+    MakeMatrix(vZeroVector3, mRotMesh, MeshMatrix);
+
+    // get source hull position, orientation and half size
+    const dMatrix3& mRotBox=*(const dMatrix3*)dGeomGetRotation(BoxGeom);
+    const dVector3& vPosBox=*(const dVector3*)dGeomGetPosition(BoxGeom);
+
+    dVector3 vOffsetPosBox;
+    dSubtractVectors3(vOffsetPosBox, vPosBox, vPosMesh);
+
+    // Make OBB
+    OBB Box;
+    Box.mCenter.Set(vOffsetPosBox[0], vOffsetPosBox[1], vOffsetPosBox[2]);
+    Box.mExtents.Set(cData.m_vBoxHalfSize[0], cData.m_vBoxHalfSize[1], cData.m_vBoxHalfSize[2]);
+    Box.mRot.Set(
+        mRotBox[0], mRotBox[4], mRotBox[8], 
+        mRotBox[1], mRotBox[5], mRotBox[9],
+        mRotBox[2], mRotBox[6], mRotBox[10]);
+
+    // TC results
+    if (TriMesh->getDoTC(dxTriMesh::TTC_BOX)) {
+        dxTriMesh::BoxTC* BoxTC = 0;
+        const int iBoxCacheSize = TriMesh->m_BoxTCCache.size();
+        for (int i = 0; i != iBoxCacheSize; i++){
+            if (TriMesh->m_BoxTCCache[i].Geom == BoxGeom){
+                BoxTC = &TriMesh->m_BoxTCCache[i];
+                break;
+            }
+        }
+        if (!BoxTC){
+            TriMesh->m_BoxTCCache.push(dxTriMesh::BoxTC());
+
+            BoxTC = &TriMesh->m_BoxTCCache[TriMesh->m_BoxTCCache.size() - 1];
+            BoxTC->Geom = BoxGeom;
+            BoxTC->FatCoeff = 1.1f; // Pierre recommends this, instead of 1.0
+        }
+
+        // Intersect
+        Collider.SetTemporalCoherence(true);
+        Collider.Collide(*BoxTC, Box, TriMesh->retrieveMeshBVTreeRef(), null, &MeshMatrix);
+    }
+    else {
+        Collider.SetTemporalCoherence(false);
+        Collider.Collide(BoxCache, Box, TriMesh->retrieveMeshBVTreeRef(), null, &MeshMatrix);
+    }
+}
+
+int dCollideBTL(dxGeom* g1, dxGeom* BoxGeom, int Flags, dContactGeom* Contacts, int Stride){
+    dIASSERT (Stride >= (int)sizeof(dContactGeom));
+    dIASSERT (g1->type == dTriMeshClass);
+    dIASSERT (BoxGeom->type == dBoxClass);
+    dIASSERT ((Flags & NUMC_MASK) >= 1);
+
+    dxTriMesh* TriMesh = (dxTriMesh*)g1;
+
+    sTrimeshBoxColliderData cData;
+    cData.SetupInitialContext(TriMesh, BoxGeom, Flags, Contacts, Stride);
+
+    const unsigned uiTLSKind = TriMesh->getParentSpaceTLSKind();
+    dIASSERT(uiTLSKind == BoxGeom->getParentSpaceTLSKind()); // The colliding spaces must use matching cleanup method
+    TrimeshCollidersCache *pccColliderCache = GetTrimeshCollidersCache(uiTLSKind);
+    OBBCollider& Collider = pccColliderCache->m_OBBCollider;
+
+    dQueryBTLPotentialCollisionTriangles(Collider, cData, TriMesh, BoxGeom,
+        pccColliderCache->m_DefaultBoxCache);
+
+    if (!Collider.GetContactStatus()) {
+        // no collision occurred
+        return 0;
+    }
+
+    // Retrieve data
+    int TriCount = Collider.GetNbTouchedPrimitives();
+    const int* Triangles = (const int*)Collider.GetTouchedPrimitives();
+
+    if (TriCount != 0){
+        if (TriMesh->m_ArrayCallback != null){
+            TriMesh->m_ArrayCallback(TriMesh, BoxGeom, Triangles, TriCount);
+        }
+
+        // get destination hull position and orientation
+        const dMatrix3& mRotMesh=*(const dMatrix3*)dGeomGetRotation(TriMesh);
+        const dVector3& vPosMesh=*(const dVector3*)dGeomGetPosition(TriMesh);
+
+        // loop through all intersecting triangles
+        for (int i = 0; i < TriCount; i++){
+            const int Triint = Triangles[i];
+            if (!TriMesh->invokeCallback(BoxGeom, Triint)) continue;
+
+            dVector3 dv[3];
+            TriMesh->fetchMeshTriangle(dv, Triint, vPosMesh, mRotMesh);
+
+            bool bFinishSearching;
+            cData.TestCollisionForSingleTriangle(Triint, dv, bFinishSearching);
+
+            if (bFinishSearching) {
+                break;
+            }
+        }
+    }
+
+    return cData.m_ctContacts;
+}
+#endif
+
+// GIMPACT version of box to mesh collider
+#if dTRIMESH_GIMPACT
+int dCollideBTL(dxGeom* g1, dxGeom* BoxGeom, int Flags, dContactGeom* Contacts, int Stride)
+{
+    dIASSERT (Stride >= (int)sizeof(dContactGeom));
+    dIASSERT (g1->type == dTriMeshClass);
+    dIASSERT (BoxGeom->type == dBoxClass);
+    dIASSERT ((Flags & NUMC_MASK) >= 1);
+
+
+    dxTriMesh* TriMesh = (dxTriMesh*)g1;
+
+    g1 -> recomputeAABB();
+    BoxGeom -> recomputeAABB();
+
+
+    sTrimeshBoxColliderData cData;
+    cData.SetupInitialContext(TriMesh, BoxGeom, Flags, Contacts, Stride);
+
+    //*****at first , collide box aabb******//
+
+    GIM_TRIMESH * ptrimesh = &TriMesh->m_collision_trimesh;
+    aabb3f test_aabb(BoxGeom->aabb[0], BoxGeom->aabb[1], BoxGeom->aabb[2], BoxGeom->aabb[3], BoxGeom->aabb[4], BoxGeom->aabb[5]);
+
+    GDYNAMIC_ARRAY collision_result;
+    GIM_CREATE_BOXQUERY_LIST(collision_result);
+
+    gim_aabbset_box_collision(&test_aabb, &ptrimesh->m_aabbset , &collision_result);
+
+    if(collision_result.m_size==0)
+    {
+        GIM_DYNARRAY_DESTROY(collision_result);
+        return 0;
+    }
+    //*****Set globals for box collision******//
+
+    //collide triangles
+
+    GUINT32 * boxesresult = GIM_DYNARRAY_POINTER(GUINT32,collision_result);
+    gim_trimesh_locks_work_data(ptrimesh);
+
+    for(unsigned int i=0;i<collision_result.m_size;i++)
+    {
+        dVector3 dv[3];
+
+        int Triint = boxesresult[i];
+        gim_trimesh_get_triangle_vertices(ptrimesh, Triint, dv[0], dv[1], dv[2]);
+
+        bool bFinishSearching;
+        cData.TestCollisionForSingleTriangle(Triint, dv, bFinishSearching);
+
+        if (bFinishSearching)
+        {
+            break;
+        }
+    }
+
+    gim_trimesh_unlocks_work_data(ptrimesh);
+    GIM_DYNARRAY_DESTROY(collision_result);
+
+    return cData.m_ctContacts;
+}
+#endif
+
+
+
+#endif // dTRIMESH_ENABLED
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_ccylinder.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_ccylinder.cpp
new file mode 100644
index 0000000..6681cc6
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_ccylinder.cpp
@@ -0,0 +1,1183 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *	Triangle-Capsule(Capsule) collider by Alen Ladavac
+ *  Ported to ODE by Nguyen Binh
+ */
+
+// NOTES from Nguyen Binh
+//	14 Apr : Seem to be robust
+//       There is a problem when you use original Step and set contact friction
+//		surface.mu = dInfinity;
+//		More description : 
+//			When I dropped Capsule over the bunny ears, it seems to stuck
+//			there for a while. I think the cause is when you set surface.mu = dInfinity;
+//			the friction force is too high so it just hang the capsule there.
+//			So the good cure for this is to set mu = around 1.5 (in my case)
+//		For StepFast1, this become as solid as rock : StepFast1 just approximate 
+//		friction force.
+
+// NOTES from Croteam's Alen
+//As a side note... there are some extra contacts that can be generated
+//on the edge between two triangles, and if the capsule penetrates deeply into
+//the triangle (usually happens with large mass or low FPS), some such
+//contacts can in some cases push the capsule away from the edge instead of
+//away from the two triangles. This shows up as capsule slowing down a bit
+//when hitting an edge while sliding along a flat tesselated grid of
+//triangles. This is only if capsule is standing upwards.
+
+//Same thing can appear whenever a smooth object (e.g sphere) hits such an
+//edge, and it needs to be solved as a special case probably. This is a
+//problem we are looking forward to address soon.
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_util.h"
+#include "collision_trimesh_internal.h"
+#include "util.h"
+
+
+#if dTRIMESH_ENABLED
+
+// OPCODE version
+#if dTRIMESH_OPCODE
+
+// largest number, double or float
+#if defined(dSINGLE)
+#define MAX_REAL	FLT_MAX
+#define MIN_REAL	(-FLT_MAX)
+#else
+#define MAX_REAL	DBL_MAX
+#define MIN_REAL	(-DBL_MAX)
+#endif
+
+// To optimize before send contacts to dynamic part
+#define OPTIMIZE_CONTACTS 1
+
+// dVector3
+// r=a-b
+#define SUBTRACT(a,b,r) dSubtractVectors3(r, a, b)
+
+
+// dVector3
+// a=b
+#define SET(a,b) dCopyVector3(a, b)
+
+
+// dMatrix3
+// a=b
+#define SETM(a,b) dCopyMatrix4x4(a, b)
+
+
+// dVector3
+// r=a+b
+#define ADD(a,b,r) dAddVectors3(r, a, b)
+
+
+// dMatrix3, int, dVector3
+// v=column a from m
+#define GETCOL(m,a,v) dGetMatrixColumn3(v, m, a)
+
+
+// dVector4, dVector3
+// distance between plane p and point v
+#define POINTDISTANCE(p,v) dPointPlaneDistance(v, p)
+
+
+// dVector4, dVector3, dReal
+// construct plane from normal and d
+#define CONSTRUCTPLANE(plane,normal,d) dConstructPlane(normal, d, plane)
+
+
+// dVector3
+// length of vector a
+#define LENGTHOF(a) dCalcVectorLength3(a)
+
+
+static inline dReal _length2OfVector3(dVector3 v)
+{
+    return dCalcVectorLengthSquare3(v);
+}
+
+
+// Local contacts data
+typedef struct _sLocalContactData
+{
+    dVector3	vPos;
+    dVector3	vNormal;
+    dReal		fDepth;
+    int			triIndex;
+    int			nFlags; // 0 = filtered out, 1 = OK
+}sLocalContactData;
+
+struct sTrimeshCapsuleColliderData
+{
+    sTrimeshCapsuleColliderData(): m_gLocalContacts(NULL), m_ctContacts(0) { memset(m_vN, 0, sizeof(dVector3)); }
+
+    void SetupInitialContext(dxTriMesh *TriMesh, dxGeom *Capsule, int flags, int skip);
+    int TestCollisionForSingleTriangle(int ctContacts0, int Triint, dVector3 dv[3], 
+        uint8 flags, bool &bOutFinishSearching);
+
+#if OPTIMIZE_CONTACTS
+    void _OptimizeLocalContacts();
+#endif
+    int	_ProcessLocalContacts(dContactGeom *contact, dxTriMesh *TriMesh, dxGeom *Capsule);
+
+    static BOOL _cldClipEdgeToPlane(dVector3 &vEpnt0, dVector3 &vEpnt1, const dVector4& plPlane);
+    BOOL _cldTestAxis(const dVector3 &v0, const dVector3 &v1, const dVector3 &v2, 
+        dVector3 vAxis, int iAxis, BOOL bNoFlip = FALSE);
+    BOOL _cldTestSeparatingAxesOfCapsule(const dVector3 &v0, const dVector3 &v1, 
+        const dVector3 &v2, uint8 flags);
+    void _cldTestOneTriangleVSCapsule(const dVector3 &v0, const dVector3 &v1, 
+        const dVector3 &v2, uint8 flags);
+
+    sLocalContactData   *m_gLocalContacts;
+    unsigned int		m_ctContacts;
+
+    // capsule data
+    // real time data
+    dMatrix3  m_mCapsuleRotation;
+    dVector3   m_vCapsulePosition;
+    dVector3   m_vCapsuleAxis;
+    // static data
+    dReal      m_vCapsuleRadius;
+    dReal      m_fCapsuleSize;
+
+    // mesh data
+    // dMatrix4  mHullDstPl;
+    dMatrix3   m_mTriMeshRot;
+    dVector3   m_vTriMeshPos;
+    dVector3   m_vE0, m_vE1, m_vE2;
+
+    // global collider data
+    dVector3 m_vNormal;
+    dReal    m_fBestDepth;
+    dReal    m_fBestCenter;
+    dReal    m_fBestrt;
+    int		m_iBestAxis;
+    dVector3 m_vN;
+
+    dVector3 m_vV0; 
+    dVector3 m_vV1;
+    dVector3 m_vV2;
+
+    // ODE contact's specific
+    unsigned int m_iFlags;
+    int m_iStride;
+};
+
+// Capsule lie on axis number 3 = (Z axis)
+static const int nCAPSULE_AXIS = 2;
+
+
+#if OPTIMIZE_CONTACTS
+
+// Use to classify contacts to be "near" in position
+static const dReal fSameContactPositionEpsilon = REAL(0.0001); // 1e-4
+// Use to classify contacts to be "near" in normal direction
+static const dReal fSameContactNormalEpsilon = REAL(0.0001); // 1e-4
+
+// If this two contact can be classified as "near"
+inline int _IsNearContacts(sLocalContactData& c1,sLocalContactData& c2)
+{
+    int bPosNear = 0;
+    int bSameDir = 0;
+    dVector3	vDiff;
+
+    // First check if they are "near" in position
+    SUBTRACT(c1.vPos,c2.vPos,vDiff);
+    if (  (dFabs(vDiff[0]) < fSameContactPositionEpsilon)
+        &&(dFabs(vDiff[1]) < fSameContactPositionEpsilon)
+        &&(dFabs(vDiff[2]) < fSameContactPositionEpsilon))
+    {
+        bPosNear = 1;
+    }
+
+    // Second check if they are "near" in normal direction
+    SUBTRACT(c1.vNormal,c2.vNormal,vDiff);
+    if (  (dFabs(vDiff[0]) < fSameContactNormalEpsilon)
+        &&(dFabs(vDiff[1]) < fSameContactNormalEpsilon)
+        &&(dFabs(vDiff[2]) < fSameContactNormalEpsilon) )
+    {
+        bSameDir = 1;
+    }
+
+    // Will be "near" if position and normal direction are "near"
+    return (bPosNear && bSameDir);
+}
+
+inline int _IsBetter(sLocalContactData& c1,sLocalContactData& c2)
+{
+    // The not better will be throw away
+    // You can change the selection criteria here
+    return (c1.fDepth > c2.fDepth);
+}
+
+// iterate through gLocalContacts and filtered out "near contact"
+void sTrimeshCapsuleColliderData::_OptimizeLocalContacts()
+{
+    int nContacts = m_ctContacts;
+
+    for (int i = 0; i < nContacts-1; i++)
+    {
+        for (int j = i+1; j < nContacts; j++)
+        {
+            if (_IsNearContacts(m_gLocalContacts[i],m_gLocalContacts[j]))
+            {
+                // If they are seem to be the samed then filtered 
+                // out the least penetrate one
+                if (_IsBetter(m_gLocalContacts[j],m_gLocalContacts[i]))
+                {
+                    m_gLocalContacts[i].nFlags = 0; // filtered 1st contact
+                }
+                else
+                {
+                    m_gLocalContacts[j].nFlags = 0; // filtered 2nd contact
+                }
+
+                // NOTE
+                // There is other way is to add two depth together but
+                // it not work so well. Why???
+            }
+        }
+    }
+}
+#endif // OPTIMIZE_CONTACTS
+
+int	sTrimeshCapsuleColliderData::_ProcessLocalContacts(dContactGeom *contact,
+                                                       dxTriMesh *TriMesh, dxGeom *Capsule)
+{
+#if OPTIMIZE_CONTACTS
+    if (m_ctContacts > 1 && !(m_iFlags & CONTACTS_UNIMPORTANT))
+    {
+        // Can be optimized...
+        _OptimizeLocalContacts();
+    }
+#endif		
+
+    unsigned int iContact = 0;
+    dContactGeom* Contact = 0;
+
+    unsigned int nFinalContact = 0;
+
+    for (iContact = 0; iContact < m_ctContacts; iContact ++)
+    {
+        // Ensure that we haven't created too many contacts
+        if( nFinalContact >= (m_iFlags & NUMC_MASK)) 
+        {
+            break;
+        }
+
+        if (1 == m_gLocalContacts[iContact].nFlags)
+        {
+            Contact =  SAFECONTACT(m_iFlags, contact, nFinalContact, m_iStride);
+            Contact->depth = m_gLocalContacts[iContact].fDepth;
+            SET(Contact->normal,m_gLocalContacts[iContact].vNormal);
+            SET(Contact->pos,m_gLocalContacts[iContact].vPos);
+            Contact->g1 = TriMesh;
+            Contact->g2 = Capsule;
+            Contact->side1 = m_gLocalContacts[iContact].triIndex;
+            Contact->side2 = -1;
+
+            nFinalContact++;
+        }
+    }
+    // debug
+    //if (nFinalContact != m_ctContacts)
+    //{
+    //	printf("[Info] %d contacts generated,%d  filtered.\n",m_ctContacts,m_ctContacts-nFinalContact);
+    //}
+
+    return nFinalContact;
+}
+
+BOOL sTrimeshCapsuleColliderData::_cldClipEdgeToPlane( 
+    dVector3 &vEpnt0, dVector3 &vEpnt1, const dVector4& plPlane)
+{
+    // calculate distance of edge points to plane
+    dReal fDistance0 = POINTDISTANCE( plPlane, vEpnt0 );
+    dReal fDistance1 = POINTDISTANCE( plPlane, vEpnt1 );
+
+    // if both points are behind the plane
+    if ( fDistance0 < 0 && fDistance1 < 0 ) 
+    {
+        // do nothing
+        return FALSE;
+        // if both points in front of the plane
+    } else if ( fDistance0 > 0 && fDistance1 > 0 ) 
+    {
+        // accept them
+        return TRUE;
+        // if we have edge/plane intersection
+    } else if ((fDistance0 > 0 && fDistance1 < 0) || ( fDistance0 < 0 && fDistance1 > 0)) 
+    {
+        // find intersection point of edge and plane
+        dVector3 vIntersectionPoint;
+        vIntersectionPoint[0]= vEpnt0[0]-(vEpnt0[0]-vEpnt1[0])*fDistance0/(fDistance0-fDistance1);
+        vIntersectionPoint[1]= vEpnt0[1]-(vEpnt0[1]-vEpnt1[1])*fDistance0/(fDistance0-fDistance1);
+        vIntersectionPoint[2]= vEpnt0[2]-(vEpnt0[2]-vEpnt1[2])*fDistance0/(fDistance0-fDistance1);
+
+        // clamp correct edge to intersection point
+        if ( fDistance0 < 0 ) 
+        {
+            SET(vEpnt0,vIntersectionPoint);
+        } else 
+        {
+            SET(vEpnt1,vIntersectionPoint);
+        }
+        return TRUE;
+    }
+    return TRUE;
+}
+
+BOOL sTrimeshCapsuleColliderData::_cldTestAxis(
+    const dVector3 &/*v0*/,
+    const dVector3 &/*v1*/,
+    const dVector3 &/*v2*/, 
+    dVector3 vAxis, 
+    int iAxis,
+    BOOL bNoFlip/* = FALSE*/) 
+{
+
+    // calculate length of separating axis vector
+    dReal fL = LENGTHOF(vAxis);
+    // if not long enough
+    // TODO : dReal epsilon please
+    if ( fL < REAL(1e-5) ) 
+    {
+        // do nothing
+        //iLastOutAxis = 0;
+        return TRUE;
+    }
+
+    // otherwise normalize it
+    dNormalize3(vAxis);
+
+    // project capsule on vAxis
+    dReal frc = dFabs(dCalcVectorDot3(m_vCapsuleAxis,vAxis))*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius) + m_vCapsuleRadius;
+
+    // project triangle on vAxis
+    dReal afv[3];
+    afv[0] = dCalcVectorDot3(m_vV0, vAxis);
+    afv[1] = dCalcVectorDot3(m_vV1, vAxis);
+    afv[2] = dCalcVectorDot3(m_vV2, vAxis);
+
+    dReal fMin = MAX_REAL;
+    dReal fMax = MIN_REAL;
+
+    // for each vertex 
+    for(int i=0; i<3; i++) 
+    {
+        // find minimum
+        if (afv[i]<fMin) 
+        {
+            fMin = afv[i];
+        }
+        // find maximum
+        if (afv[i]>fMax) 
+        {
+            fMax = afv[i];
+        }
+    }
+
+    // find triangle's center of interval on axis
+    dReal fCenter = (fMin+fMax)*REAL(0.5);
+    // calculate triangles half interval 
+    dReal fTriangleRadius = (fMax-fMin)*REAL(0.5);
+
+    // if they do not overlap, 
+    if (dFabs(fCenter) > ( frc + fTriangleRadius ))
+    { 
+        // exit, we have no intersection
+        return FALSE; 
+    }
+
+    // calculate depth 
+    dReal fDepth = dFabs(fCenter) - (frc+fTriangleRadius);
+
+    // if greater then best found so far
+    if ( fDepth > m_fBestDepth ) 
+    {
+        // remember depth
+        m_fBestDepth  = fDepth;
+        m_fBestCenter = fCenter;
+        m_fBestrt     = fTriangleRadius;
+
+        m_vNormal[0]     = vAxis[0];
+        m_vNormal[1]     = vAxis[1];
+        m_vNormal[2]     = vAxis[2];
+
+        m_iBestAxis   = iAxis;
+
+        // flip normal if interval is wrong faced
+        if (fCenter<0 && !bNoFlip) 
+        { 
+            m_vNormal[0] = -m_vNormal[0];
+            m_vNormal[1] = -m_vNormal[1];
+            m_vNormal[2] = -m_vNormal[2];
+
+            m_fBestCenter = -fCenter;
+        }
+    }
+
+    return TRUE;
+}
+
+// helper for less key strokes
+inline void _CalculateAxis(const dVector3& v1,
+                           const dVector3& v2,
+                           const dVector3& v3,
+                           const dVector3& v4,
+                           dVector3& r)
+{
+    dVector3 t1;
+    dVector3 t2;
+
+    SUBTRACT(v1,v2,t1);
+    dCalcVectorCross3(t2,t1,v3);
+    dCalcVectorCross3(r,t2,v4);
+}
+
+BOOL sTrimeshCapsuleColliderData::_cldTestSeparatingAxesOfCapsule(
+    const dVector3 &v0,
+    const dVector3 &v1,
+    const dVector3 &v2,
+    uint8 flags) 
+{
+    // calculate caps centers in absolute space
+    dVector3 vCp0;
+    vCp0[0] = m_vCapsulePosition[0] + m_vCapsuleAxis[0]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+    vCp0[1] = m_vCapsulePosition[1] + m_vCapsuleAxis[1]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+    vCp0[2] = m_vCapsulePosition[2] + m_vCapsuleAxis[2]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+
+    dVector3 vCp1;
+    vCp1[0] = m_vCapsulePosition[0] - m_vCapsuleAxis[0]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+    vCp1[1] = m_vCapsulePosition[1] - m_vCapsuleAxis[1]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+    vCp1[2] = m_vCapsulePosition[2] - m_vCapsuleAxis[2]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+
+    // reset best axis
+    m_iBestAxis = 0;
+    // reset best depth
+    m_fBestDepth  = -MAX_REAL;
+    // reset separating axis vector
+    dVector3 vAxis = {REAL(0.0),REAL(0.0),REAL(0.0),REAL(0.0)};
+
+    // Epsilon value for checking axis vector length 
+    const dReal fEpsilon = 1e-6f;
+
+    // Translate triangle to Cc cord.
+    SUBTRACT(v0, m_vCapsulePosition, m_vV0);
+    SUBTRACT(v1, m_vCapsulePosition, m_vV1);
+    SUBTRACT(v2, m_vCapsulePosition, m_vV2);
+
+    // We begin to test for 19 separating axis now
+    // I wonder does it help if we employ the method like ISA-GJK???
+    // Or at least we should do experiment and find what axis will
+    // be most likely to be separating axis to check it first.
+
+    // Original
+    // axis m_vN
+    //vAxis = -m_vN;
+    vAxis[0] = - m_vN[0];
+    vAxis[1] = - m_vN[1];
+    vAxis[2] = - m_vN[2];
+    if (!_cldTestAxis(v0, v1, v2, vAxis, 1, TRUE)) 
+    { 
+        return FALSE; 
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_FIRST_EDGE)
+    {
+        // axis CxE0 - Edge 0
+        dCalcVectorCross3(vAxis,m_vCapsuleAxis,m_vE0);
+        //vAxis = dCalcVectorCross3( m_vCapsuleAxis cross vE0 );
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 2)) { 
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_SECOND_EDGE)
+    {
+        // axis CxE1 - Edge 1
+        dCalcVectorCross3(vAxis,m_vCapsuleAxis,m_vE1);
+        //vAxis = ( m_vCapsuleAxis cross m_vE1 );
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 3)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_THIRD_EDGE)
+    {
+        // axis CxE2 - Edge 2
+        //vAxis = ( m_vCapsuleAxis cross m_vE2 );
+        dCalcVectorCross3(vAxis,m_vCapsuleAxis,m_vE2);
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 4)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_FIRST_EDGE)
+    {
+        // first capsule point
+        // axis ((Cp0-V0) x E0) x E0
+        _CalculateAxis(vCp0,v0,m_vE0,m_vE0,vAxis);
+        //	vAxis = ( ( vCp0-v0) cross vE0 ) cross vE0;
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 5)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_SECOND_EDGE)
+    {
+        // axis ((Cp0-V1) x E1) x E1
+        _CalculateAxis(vCp0,v1,m_vE1,m_vE1,vAxis);
+        //vAxis = ( ( vCp0-v1) cross vE1 ) cross vE1;
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 6)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_THIRD_EDGE)
+    {
+        // axis ((Cp0-V2) x E2) x E2
+        _CalculateAxis(vCp0,v2,m_vE2,m_vE2,vAxis);
+        //vAxis = ( ( vCp0-v2) cross vE2 ) cross vE2;
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 7)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_FIRST_EDGE)
+    {
+        // second capsule point
+        // axis ((Cp1-V0) x E0) x E0
+        _CalculateAxis(vCp1,v0,m_vE0,m_vE0,vAxis);
+        //vAxis = ( ( vCp1-v0 ) cross vE0 ) cross vE0;
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 8)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_SECOND_EDGE)
+    {
+        // axis ((Cp1-V1) x E1) x E1
+        _CalculateAxis(vCp1,v1,m_vE1,m_vE1,vAxis);
+        //vAxis = ( ( vCp1-v1 ) cross vE1 ) cross vE1;
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 9)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_THIRD_EDGE)
+    {
+        // axis ((Cp1-V2) x E2) x E2
+        _CalculateAxis(vCp1,v2,m_vE2,m_vE2,vAxis);
+        //vAxis = ( ( vCp1-v2 ) cross vE2 ) cross vE2;
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 10)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_FIRST_VERTEX)
+    {
+        // first vertex on triangle
+        // axis ((V0-Cp0) x C) x C
+        _CalculateAxis(v0,vCp0,m_vCapsuleAxis,m_vCapsuleAxis,vAxis);
+        //vAxis = ( ( v0-vCp0 ) cross m_vCapsuleAxis ) cross m_vCapsuleAxis;
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 11)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_SECOND_VERTEX)
+    {
+        // second vertex on triangle
+        // axis ((V1-Cp0) x C) x C
+        _CalculateAxis(v1,vCp0,m_vCapsuleAxis,m_vCapsuleAxis,vAxis);	
+        //vAxis = ( ( v1-vCp0 ) cross vCapsuleAxis ) cross vCapsuleAxis;
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 12)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_THIRD_VERTEX)
+    {
+        // third vertex on triangle
+        // axis ((V2-Cp0) x C) x C
+        _CalculateAxis(v2,vCp0,m_vCapsuleAxis,m_vCapsuleAxis,vAxis);
+        //vAxis = ( ( v2-vCp0 ) cross vCapsuleAxis ) cross vCapsuleAxis;
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 13)) {
+                return FALSE;
+            }
+        }
+    }
+
+    // Test as separating axes direction vectors between each triangle
+    // edge and each capsule's cap center
+
+    if (flags & dxTriMeshData::CUF_USE_FIRST_VERTEX)
+    {
+        // first triangle vertex and first capsule point
+        //vAxis = v0 - vCp0;
+        SUBTRACT(v0,vCp0,vAxis);
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 14)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_SECOND_VERTEX)
+    {
+        // second triangle vertex and first capsule point
+        //vAxis = v1 - vCp0;
+        SUBTRACT(v1,vCp0,vAxis);
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 15)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_THIRD_VERTEX)
+    {
+        // third triangle vertex and first capsule point
+        //vAxis = v2 - vCp0;
+        SUBTRACT(v2,vCp0,vAxis);
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 16)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_FIRST_VERTEX)
+    {
+        // first triangle vertex and second capsule point
+        //vAxis = v0 - vCp1;
+        SUBTRACT(v0,vCp1,vAxis);
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 17)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_SECOND_VERTEX)
+    {
+        // second triangle vertex and second capsule point
+        //vAxis = v1 - vCp1;
+        SUBTRACT(v1,vCp1,vAxis);
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 18)) {
+                return FALSE;
+            }
+        }
+    }
+
+    if (flags & dxTriMeshData::CUF_USE_THIRD_VERTEX)
+    {
+        // third triangle vertex and second capsule point
+        //vAxis = v2 - vCp1;
+        SUBTRACT(v2,vCp1,vAxis);
+        if (_length2OfVector3( vAxis ) > fEpsilon) {
+            if (!_cldTestAxis(v0, v1, v2, vAxis, 19)) {
+                return FALSE;
+            }
+        }
+    }
+
+    return TRUE;
+}
+
+// test one mesh triangle on intersection with capsule
+void sTrimeshCapsuleColliderData::_cldTestOneTriangleVSCapsule(
+    const dVector3 &v0, const dVector3 &v1, const dVector3 &v2,
+    uint8 flags)
+{
+    // calculate edges
+    SUBTRACT(v1,v0,m_vE0);
+    SUBTRACT(v2,v1,m_vE1);
+    SUBTRACT(v0,v2,m_vE2);
+
+    dVector3	_minus_vE0;
+    SUBTRACT(v0,v1,_minus_vE0);
+
+    // calculate poly normal
+    dCalcVectorCross3(m_vN,m_vE1,_minus_vE0);
+
+    // Even though all triangles might be initially valid, 
+    // a triangle may degenerate into a segment after applying 
+    // space transformation.
+    if (!dSafeNormalize3(m_vN))
+    {
+        return;
+    }
+
+    // create plane from triangle
+    dReal plDistance = -dCalcVectorDot3(v0,m_vN);
+    dVector4 plTrianglePlane;
+    CONSTRUCTPLANE(plTrianglePlane,m_vN,plDistance);
+
+    // calculate capsule distance to plane
+    dReal fDistanceCapsuleCenterToPlane = POINTDISTANCE(plTrianglePlane,m_vCapsulePosition);
+
+    // Capsule must be over positive side of triangle
+    if (fDistanceCapsuleCenterToPlane < 0 /* && !bDoubleSided*/) 
+    {
+        // if not don't generate contacts
+        return;
+    }
+
+    dVector3 vPnt0, vPnt1, vPnt2;
+    SET	(vPnt0,v0);
+
+    if (fDistanceCapsuleCenterToPlane < 0)
+    {
+        SET	(vPnt1,v2);
+        SET	(vPnt2,v1);
+    }
+    else
+    {
+        SET	(vPnt1,v1);
+        SET	(vPnt2,v2);
+    }
+
+    // do intersection test and find best separating axis
+    if (!_cldTestSeparatingAxesOfCapsule(vPnt0, vPnt1, vPnt2, flags))
+    {
+        // if not found do nothing
+        return;
+    }
+
+    // if best separation axis is not found
+    if (m_iBestAxis == 0 ) 
+    {
+        // this should not happen (we should already exit in that case)
+        dIASSERT(FALSE);
+        // do nothing
+        return;
+    }
+
+    // calculate caps centers in absolute space
+    dVector3 vCposTrans;
+    vCposTrans[0] = m_vCapsulePosition[0] + m_vNormal[0]*m_vCapsuleRadius;
+    vCposTrans[1] = m_vCapsulePosition[1] + m_vNormal[1]*m_vCapsuleRadius;
+    vCposTrans[2] = m_vCapsulePosition[2] + m_vNormal[2]*m_vCapsuleRadius;
+
+    dVector3 vCEdgePoint0;
+    vCEdgePoint0[0]  = vCposTrans[0] + m_vCapsuleAxis[0]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+    vCEdgePoint0[1]  = vCposTrans[1] + m_vCapsuleAxis[1]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+    vCEdgePoint0[2]  = vCposTrans[2] + m_vCapsuleAxis[2]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+
+    dVector3 vCEdgePoint1;
+    vCEdgePoint1[0] = vCposTrans[0] - m_vCapsuleAxis[0]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+    vCEdgePoint1[1] = vCposTrans[1] - m_vCapsuleAxis[1]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+    vCEdgePoint1[2] = vCposTrans[2] - m_vCapsuleAxis[2]*(m_fCapsuleSize*REAL(0.5)-m_vCapsuleRadius);
+
+    // transform capsule edge points into triangle space
+    vCEdgePoint0[0] -= vPnt0[0];
+    vCEdgePoint0[1] -= vPnt0[1];
+    vCEdgePoint0[2] -= vPnt0[2];
+
+    vCEdgePoint1[0] -= vPnt0[0];
+    vCEdgePoint1[1] -= vPnt0[1];
+    vCEdgePoint1[2] -= vPnt0[2];
+
+    dVector4 plPlane;
+    dVector3 _minus_vN;
+    _minus_vN[0] = -m_vN[0];
+    _minus_vN[1] = -m_vN[1];
+    _minus_vN[2] = -m_vN[2];
+    // triangle plane
+    CONSTRUCTPLANE(plPlane,_minus_vN,0);
+    //plPlane = Plane4f( -m_vN, 0);
+
+    if (!_cldClipEdgeToPlane( vCEdgePoint0, vCEdgePoint1, plPlane )) 
+    { 
+        return; 
+    }
+
+    // plane with edge 0
+    dVector3 vTemp;
+    dCalcVectorCross3(vTemp,m_vN,m_vE0);
+    CONSTRUCTPLANE(plPlane, vTemp, REAL(1e-5));
+    if (!_cldClipEdgeToPlane( vCEdgePoint0, vCEdgePoint1, plPlane ))
+    { 
+        return; 
+    }
+
+    dCalcVectorCross3(vTemp,m_vN,m_vE1);
+    CONSTRUCTPLANE(plPlane, vTemp, -(dCalcVectorDot3(m_vE0,vTemp)-REAL(1e-5)));
+    if (!_cldClipEdgeToPlane( vCEdgePoint0, vCEdgePoint1, plPlane )) 
+    { 
+        return; 
+    }
+
+    dCalcVectorCross3(vTemp,m_vN,m_vE2);
+    CONSTRUCTPLANE(plPlane, vTemp, REAL(1e-5));
+    if (!_cldClipEdgeToPlane( vCEdgePoint0, vCEdgePoint1, plPlane )) { 
+        return; 
+    }
+
+    // return capsule edge points into absolute space
+    vCEdgePoint0[0] += vPnt0[0];
+    vCEdgePoint0[1] += vPnt0[1];
+    vCEdgePoint0[2] += vPnt0[2];
+
+    vCEdgePoint1[0] += vPnt0[0];
+    vCEdgePoint1[1] += vPnt0[1];
+    vCEdgePoint1[2] += vPnt0[2];
+
+    // calculate depths for both contact points
+    SUBTRACT(vCEdgePoint0,m_vCapsulePosition,vTemp);
+    dReal fDepth0 = dCalcVectorDot3(vTemp,m_vNormal) - (m_fBestCenter-m_fBestrt);
+    SUBTRACT(vCEdgePoint1,m_vCapsulePosition,vTemp);
+    dReal fDepth1 = dCalcVectorDot3(vTemp,m_vNormal) - (m_fBestCenter-m_fBestrt);
+
+    // clamp depths to zero
+    if (fDepth0 < 0) 
+    {
+        fDepth0 = 0.0f;
+    }
+
+    if (fDepth1 < 0 ) 
+    {
+        fDepth1 = 0.0f;
+    }
+
+    // Cached contacts's data
+    // contact 0
+    dIASSERT(m_ctContacts < (m_iFlags & NUMC_MASK)); // Do not call function if there is no room to store result
+    m_gLocalContacts[m_ctContacts].fDepth = fDepth0;
+    SET(m_gLocalContacts[m_ctContacts].vNormal,m_vNormal);
+    SET(m_gLocalContacts[m_ctContacts].vPos,vCEdgePoint0);
+    m_gLocalContacts[m_ctContacts].nFlags = 1;
+    m_ctContacts++;
+
+    if (m_ctContacts < (m_iFlags & NUMC_MASK)) {
+        // contact 1
+        m_gLocalContacts[m_ctContacts].fDepth = fDepth1;
+        SET(m_gLocalContacts[m_ctContacts].vNormal,m_vNormal);
+        SET(m_gLocalContacts[m_ctContacts].vPos,vCEdgePoint1);
+        m_gLocalContacts[m_ctContacts].nFlags = 1;
+        m_ctContacts++;
+    }
+}
+
+void sTrimeshCapsuleColliderData::SetupInitialContext(dxTriMesh *TriMesh, dxGeom *Capsule, 
+                                                      int flags, int skip)
+{
+    const dMatrix3* pRot = (const dMatrix3*)dGeomGetRotation(Capsule);
+    memcpy(m_mCapsuleRotation, pRot, sizeof(dMatrix3));
+
+    const dVector3* pDst = (const dVector3*)dGeomGetPosition(Capsule);
+    memcpy(m_vCapsulePosition, pDst, sizeof(dVector3));
+
+    m_vCapsuleAxis[0] = m_mCapsuleRotation[0*4 + nCAPSULE_AXIS];
+    m_vCapsuleAxis[1] = m_mCapsuleRotation[1*4 + nCAPSULE_AXIS];
+    m_vCapsuleAxis[2] = m_mCapsuleRotation[2*4 + nCAPSULE_AXIS];
+
+    // Get size of Capsule
+    dGeomCapsuleGetParams(Capsule, &m_vCapsuleRadius, &m_fCapsuleSize);
+    m_fCapsuleSize += 2*m_vCapsuleRadius;
+
+    const dMatrix3* pTriRot = (const dMatrix3*)dGeomGetRotation(TriMesh);
+    memcpy(m_mTriMeshRot, pTriRot, sizeof(dMatrix3));
+
+    const dVector3* pTriPos = (const dVector3*)dGeomGetPosition(TriMesh);
+    memcpy(m_vTriMeshPos, pTriPos, sizeof(dVector3));
+
+    // global info for contact creation
+    m_iStride			=skip;
+    m_iFlags			=flags;
+
+    // reset contact counter
+    m_ctContacts = 0;	
+
+    // reset best depth
+    m_fBestDepth  = - MAX_REAL;
+    m_fBestCenter = 0;
+    m_fBestrt     = 0;
+
+    // reset collision normal
+    m_vNormal[0] = REAL(0.0);
+    m_vNormal[1] = REAL(0.0);
+    m_vNormal[2] = REAL(0.0);
+}
+
+int sTrimeshCapsuleColliderData::TestCollisionForSingleTriangle(int ctContacts0, 
+                                                                int Triint, dVector3 dv[3], uint8 flags, bool &bOutFinishSearching)
+{
+    // test this triangle
+    _cldTestOneTriangleVSCapsule(dv[0],dv[1],dv[2], flags);
+
+    // fill-in tri index for generated contacts
+    for (; ctContacts0 < (int)m_ctContacts; ctContacts0++)
+        m_gLocalContacts[ctContacts0].triIndex = Triint;
+
+    // Putting "break" at the end of loop prevents unnecessary checks on first pass and "continue"
+    bOutFinishSearching = (m_ctContacts >= (m_iFlags & NUMC_MASK));
+
+    return ctContacts0;
+}
+
+
+static void dQueryCCTLPotentialCollisionTriangles(OBBCollider &Collider, 
+                                                  const sTrimeshCapsuleColliderData &cData, dxTriMesh *TriMesh, dxGeom *Capsule,
+                                                  OBBCache &BoxCache)
+{
+    Matrix4x4 MeshMatrix;
+    const dVector3 vZeroVector3 = { REAL(0.0), };
+    MakeMatrix(vZeroVector3, cData.m_mTriMeshRot, MeshMatrix);
+
+    const dVector3 &vCapsulePos = cData.m_vCapsulePosition;
+    const dMatrix3 &mCapsuleRot = cData.m_mCapsuleRotation;
+
+    dVector3 vCapsuleOffsetPos;
+    dSubtractVectors3(vCapsuleOffsetPos, vCapsulePos, cData.m_vTriMeshPos);
+
+    const dReal fCapsuleRadius = cData.m_vCapsuleRadius, fCapsuleHalfAxis = cData.m_fCapsuleSize * REAL(0.5);
+
+    OBB obbCapsule;
+    obbCapsule.mCenter.Set(vCapsuleOffsetPos[0], vCapsuleOffsetPos[1], vCapsuleOffsetPos[2]);
+    obbCapsule.mExtents.Set(
+        0 == nCAPSULE_AXIS ? fCapsuleHalfAxis : fCapsuleRadius,
+        1 == nCAPSULE_AXIS ? fCapsuleHalfAxis : fCapsuleRadius,
+        2 == nCAPSULE_AXIS ? fCapsuleHalfAxis : fCapsuleRadius);
+    obbCapsule.mRot.Set(
+        mCapsuleRot[0], mCapsuleRot[4], mCapsuleRot[8],
+        mCapsuleRot[1], mCapsuleRot[5], mCapsuleRot[9],
+        mCapsuleRot[2], mCapsuleRot[6], mCapsuleRot[10]);
+
+    // TC results
+    if (TriMesh->getDoTC(dxTriMesh::TTC_BOX)) {
+        dxTriMesh::BoxTC* BoxTC = 0;
+        const int iBoxCacheSize = TriMesh->m_BoxTCCache.size();
+        for (int i = 0; i != iBoxCacheSize; i++){
+            if (TriMesh->m_BoxTCCache[i].Geom == Capsule){
+                BoxTC = &TriMesh->m_BoxTCCache[i];
+                break;
+            }
+        }
+        if (!BoxTC){
+            TriMesh->m_BoxTCCache.push(dxTriMesh::BoxTC());
+
+            BoxTC = &TriMesh->m_BoxTCCache[TriMesh->m_BoxTCCache.size() - 1];
+            BoxTC->Geom = Capsule;
+            BoxTC->FatCoeff = 1.0f;
+        }
+
+        // Intersect
+        Collider.SetTemporalCoherence(true);
+        Collider.Collide(*BoxTC, obbCapsule, TriMesh->retrieveMeshBVTreeRef(), null, &MeshMatrix);
+    }
+    else {
+        Collider.SetTemporalCoherence(false);
+        Collider.Collide(BoxCache, obbCapsule, TriMesh->retrieveMeshBVTreeRef(), null, &MeshMatrix);
+    }
+}
+
+// capsule - trimesh by CroTeam
+// Ported by Nguyem Binh
+int dCollideCCTL(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dTriMeshClass);
+    dIASSERT (o2->type == dCapsuleClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    int nContactCount = 0;
+
+    dxTriMesh *TriMesh = (dxTriMesh*)o1;
+    dxGeom *Capsule = o2;
+
+    sTrimeshCapsuleColliderData cData;
+    cData.SetupInitialContext(TriMesh, Capsule, flags, skip);
+
+    const unsigned uiTLSKind = TriMesh->getParentSpaceTLSKind();
+    dIASSERT(uiTLSKind == Capsule->getParentSpaceTLSKind()); // The colliding spaces must use matching cleanup method
+    TrimeshCollidersCache *pccColliderCache = GetTrimeshCollidersCache(uiTLSKind);
+    OBBCollider& Collider = pccColliderCache->m_OBBCollider;
+
+    // Will it better to use LSS here? -> confirm Pierre.
+    dQueryCCTLPotentialCollisionTriangles(Collider, cData, 
+        TriMesh, Capsule, pccColliderCache->m_DefaultBoxCache);
+
+    if (Collider.GetContactStatus()) 
+    {
+        // Retrieve data
+        int TriCount = Collider.GetNbTouchedPrimitives();
+
+        if (TriCount != 0)
+        {
+            const int* Triangles = (const int*)Collider.GetTouchedPrimitives();
+
+            if (TriMesh->m_ArrayCallback != null)
+            {
+                TriMesh->m_ArrayCallback(TriMesh, Capsule, Triangles, TriCount);
+            }
+
+            // allocate buffer for local contacts on stack
+            cData.m_gLocalContacts = (sLocalContactData*)dALLOCA16(sizeof(sLocalContactData)*(cData.m_iFlags & NUMC_MASK));
+
+            unsigned int ctContacts0 = cData.m_ctContacts;
+
+            const uint8 *useFlags = TriMesh->retrieveMeshSmartUseFlags();
+
+            // loop through all intersecting triangles
+            for (int i = 0; i < TriCount; i++)
+            {
+                const int Triint = Triangles[i];
+                if (!TriMesh->invokeCallback(Capsule, Triint)) continue;
+
+                dVector3 dv[3];
+                TriMesh->fetchMeshTriangle(dv, Triint, cData.m_vTriMeshPos, cData.m_mTriMeshRot);
+
+                uint8 flags = useFlags != NULL ? useFlags[Triint] : (uint8)dxTriMeshData::CUF__USE_ALL_COMPONENTS;
+
+                bool bFinishSearching;
+                ctContacts0 = cData.TestCollisionForSingleTriangle(ctContacts0, Triint, dv, flags, bFinishSearching);
+
+                if (bFinishSearching) 
+                {
+                    break;
+                }
+            }
+
+            if (cData.m_ctContacts != 0)
+            {
+                nContactCount = cData._ProcessLocalContacts(contact, TriMesh, Capsule);
+            }
+        }
+    }
+
+    return nContactCount;
+}
+
+
+#endif
+
+
+// GIMPACT version
+#if dTRIMESH_GIMPACT
+
+#include "gimpact_contact_export_helper.h"
+#include "gimpact_gim_contact_accessor.h"
+
+#define nCAPSULE_AXIS 2
+
+// capsule - trimesh  By francisco leon
+int dCollideCCTL(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dTriMeshClass);
+    dIASSERT (o2->type == dCapsuleClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxTriMesh* TriMesh = (dxTriMesh*)o1;
+    dxGeom*	   gCylinder = o2;
+
+    //Get capsule params
+    dMatrix3  mCapsuleRotation;
+    dVector3   vCapsulePosition;
+    dVector3   vCapsuleAxis;
+    dReal      vCapsuleRadius;
+    dReal      fCapsuleSize;
+    dMatrix3* pRot = (dMatrix3*) dGeomGetRotation(gCylinder);
+    memcpy(mCapsuleRotation,pRot,sizeof(dMatrix3));
+    dVector3* pDst = (dVector3*)dGeomGetPosition(gCylinder);
+    memcpy(vCapsulePosition,pDst,sizeof(dVector3));
+    //Axis
+    vCapsuleAxis[0] = mCapsuleRotation[0*4 + nCAPSULE_AXIS];
+    vCapsuleAxis[1] = mCapsuleRotation[1*4 + nCAPSULE_AXIS];
+    vCapsuleAxis[2] = mCapsuleRotation[2*4 + nCAPSULE_AXIS];
+    // Get size of CCylinder
+    dGeomCCylinderGetParams(gCylinder,&vCapsuleRadius,&fCapsuleSize);
+    fCapsuleSize*=0.5f;
+    //Set Capsule params
+    GIM_CAPSULE_DATA capsule;
+
+    capsule.m_radius = vCapsuleRadius;
+    VEC_SCALE(capsule.m_point1,fCapsuleSize,vCapsuleAxis);
+    VEC_SUM(capsule.m_point1,vCapsulePosition,capsule.m_point1);
+    VEC_SCALE(capsule.m_point2,-fCapsuleSize,vCapsuleAxis);
+    VEC_SUM(capsule.m_point2,vCapsulePosition,capsule.m_point2);
+
+
+    //Create contact list
+    GDYNAMIC_ARRAY trimeshcontacts;
+    GIM_CREATE_CONTACT_LIST(trimeshcontacts);
+
+    //Collide trimeshe vs capsule
+    gim_trimesh_capsule_collision(&TriMesh->m_collision_trimesh,&capsule,&trimeshcontacts);
+
+
+    if(trimeshcontacts.m_size == 0)
+    {
+        GIM_DYNARRAY_DESTROY(trimeshcontacts);
+        return 0;
+    }
+
+    GIM_CONTACT * ptrimeshcontacts = GIM_DYNARRAY_POINTER(GIM_CONTACT,trimeshcontacts);
+    unsigned contactcount = trimeshcontacts.m_size;
+
+    dxGIMCContactAccessor contactaccessor(ptrimeshcontacts, TriMesh, gCylinder, -1);
+    contactcount = dxGImpactContactsExportHelper::ExportMaxDepthGImpactContacts(contactaccessor, contactcount, flags, contact, skip);
+
+    GIM_DYNARRAY_DESTROY(trimeshcontacts);
+
+    return (int)contactcount;
+}
+
+
+#endif // dTRIMESH_GIMPACT
+
+
+#endif // dTRIMESH_ENABLED
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_colliders.h b/libs/ode-0.16.1/ode/src/collision_trimesh_colliders.h
new file mode 100644
index 0000000..9452f90
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_colliders.h
@@ -0,0 +1,47 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_COLLISION_TRIMESH_COLLIDERS_H_
+#define _ODE_COLLISION_TRIMESH_COLLIDERS_H_
+
+
+int dCollideCylinderTrimesh(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+int dCollideTrimeshPlane(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+int dCollideSTL(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+int dCollideBTL(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+int dCollideRTL(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+int dCollideTTL(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+int dCollideCCTL(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+int dCollideConvexTrimesh(dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip);
+
+ODE_PURE_INLINE int dCollideRayTrimesh( dxGeom *ray, dxGeom *trimesh, int flags,
+                                       dContactGeom *contact, int skip )
+{
+    // Swapped case, for code that needs it (heightfield initially)
+    // The other ray-geom colliders take geoms in a swapped order to the
+    // dCollideRTL function which is annoying when using function pointers.
+    return dCollideRTL( trimesh, ray, flags, contact, skip );
+}
+
+
+#endif // _ODE_COLLISION_TRIMESH_COLLIDERS_H_
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_disabled.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_disabled.cpp
new file mode 100644
index 0000000..69203a0
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_disabled.cpp
@@ -0,0 +1,302 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/collision.h>
+#include "config.h"
+#include "matrix.h"
+
+
+#if !dTRIMESH_ENABLED
+
+#include "collision_util.h"
+#include "collision_trimesh_internal.h"
+
+
+static const dMatrix4 identity = 
+{
+    REAL( 0.0 ), REAL( 0.0 ), REAL( 0.0 ), REAL( 0.0 ),
+    REAL( 0.0 ), REAL( 0.0 ), REAL( 0.0 ), REAL( 0.0 ),
+    REAL( 0.0 ), REAL( 0.0 ), REAL( 0.0 ), REAL( 0.0 ),
+    REAL( 0.0 ), REAL( 0.0 ), REAL( 0.0 ), REAL( 0.0 ) 
+};
+
+
+typedef dxMeshBase dxDisabledTriMesh_Parent;
+struct dxDisabledTriMesh: 
+    public dxDisabledTriMesh_Parent
+{
+public:
+    // Functions
+    dxDisabledTriMesh(dxSpace *Space, 
+        dTriCallback *Callback, dTriArrayCallback *ArrayCallback, dTriRayCallback *RayCallback):
+        dxDisabledTriMesh_Parent(Space, NULL, Callback, ArrayCallback, RayCallback, false)
+    {
+    }
+
+    virtual void computeAABB(); // This is an abstract method in the base class
+};
+
+/*virtual */
+void dxDisabledTriMesh::computeAABB()
+{
+    // Do nothing
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Stub functions for trimesh calls
+
+/*extern */
+dTriMeshDataID dGeomTriMeshDataCreate(void)
+{
+    return NULL;
+}
+
+/*extern */
+void dGeomTriMeshDataDestroy(dTriMeshDataID g)
+{
+    // Do nothing
+}
+
+
+/*extern */
+void dGeomTriMeshDataSet(dTriMeshDataID g, int data_id, void* in_data)
+{
+    // Do nothing
+}
+
+/*extern */
+void *dGeomTriMeshDataGet(dTriMeshDataID g, int data_id)
+{
+    return NULL;
+}
+
+/*extern */
+void *dGeomTriMeshDataGet2(dTriMeshDataID g, int data_id, sizeint *pout_size/*=NULL*/)
+{
+    if (pout_size != NULL)
+    {
+        *pout_size = 0;
+    }
+
+    return NULL;
+}
+
+
+/*extern */
+void dGeomTriMeshSetLastTransform( dGeomID g, const dMatrix4 last_trans )
+{
+    // Do nothing
+}
+
+/*extern */
+const dReal *dGeomTriMeshGetLastTransform( dGeomID g )
+{
+    return identity;
+}
+
+
+/*extern */
+dGeomID dCreateTriMesh(dSpaceID space, 
+    dTriMeshDataID Data,
+    dTriCallback* Callback,
+    dTriArrayCallback* ArrayCallback,
+    dTriRayCallback* RayCallback)
+{
+    return new dxDisabledTriMesh(space, Callback, ArrayCallback, RayCallback); // Oleh_Derevenko: I'm not sure if a NULL can be returned here -- keep on returning an object for backward compatibility
+}
+
+
+/*extern */
+void dGeomTriMeshSetData(dGeomID g, dTriMeshDataID Data)
+{
+    // Do nothing
+}
+
+/*extern */
+dTriMeshDataID dGeomTriMeshGetData(dGeomID g)
+{
+    return NULL;
+}
+
+
+/*extern */
+void dGeomTriMeshDataBuildSingle(dTriMeshDataID g,
+    const void* Vertices, int VertexStride, int VertexCount, 
+    const void* Indices, int IndexCount, int TriStride)
+{
+    // Do nothing
+}
+
+/*extern */
+void dGeomTriMeshDataBuildSingle1(dTriMeshDataID g,
+    const void* Vertices, int VertexStride, int VertexCount, 
+    const void* Indices, int IndexCount, int TriStride,
+    const void* Normals)
+{
+    // Do nothing
+}
+
+/*extern */
+void dGeomTriMeshDataBuildDouble(dTriMeshDataID g, 
+    const void* Vertices,  int VertexStride, int VertexCount, 
+    const void* Indices, int IndexCount, int TriStride)
+{
+    // Do nothing
+}
+
+/*extern */
+void dGeomTriMeshDataBuildDouble1(dTriMeshDataID g, 
+    const void* Vertices,  int VertexStride, int VertexCount, 
+    const void* Indices, int IndexCount, int TriStride,
+    const void* Normals)
+{
+    // Do nothing
+}
+
+/*extern */
+void dGeomTriMeshDataBuildSimple(dTriMeshDataID g,
+    const dReal* Vertices, int VertexCount,
+    const dTriIndex* Indices, int IndexCount)
+{
+    // Do nothing
+}
+
+/*extern */
+void dGeomTriMeshDataBuildSimple1(dTriMeshDataID g,
+    const dReal* Vertices, int VertexCount,
+    const dTriIndex* Indices, int IndexCount,
+    const int* Normals)
+{
+    // Do nothing
+}
+
+
+/*extern ODE_API */
+int dGeomTriMeshDataPreprocess(dTriMeshDataID g)
+{
+    // Do nothing
+    return 1;
+}
+
+/*extern ODE_API */
+int dGeomTriMeshDataPreprocess2(dTriMeshDataID g, unsigned int buildRequestFlags, const intptr *requestExtraData/*=NULL | const intptr (*)[dTRIDATAPREPROCESS_BUILD__MAX]*/)
+{
+    // Do nothing
+    return 1;
+}
+
+/*extern */
+void dGeomTriMeshSetCallback(dGeomID g, dTriCallback* Callback)
+{
+    // Do nothing
+}
+
+/*extern */
+dTriCallback* dGeomTriMeshGetCallback(dGeomID g)
+{
+    return NULL;
+}
+
+
+/*extern */
+void dGeomTriMeshSetArrayCallback(dGeomID g, dTriArrayCallback* ArrayCallback)
+{
+    // Do nothing
+}
+
+/*extern */
+dTriArrayCallback* dGeomTriMeshGetArrayCallback(dGeomID g)
+{
+    return NULL;
+}
+
+
+/*extern */
+void dGeomTriMeshSetRayCallback(dGeomID g, dTriRayCallback* Callback)
+{
+    // Do nothing
+}
+
+/*extern */
+dTriRayCallback* dGeomTriMeshGetRayCallback(dGeomID g)
+{
+    return NULL;
+}
+
+
+/*extern */
+void dGeomTriMeshSetTriMergeCallback(dGeomID g, dTriTriMergeCallback* Callback)
+{
+    // Do nothing
+}
+
+/*extern */
+dTriTriMergeCallback* dGeomTriMeshGetTriMergeCallback(dGeomID g)
+{
+    return NULL;
+}
+
+
+/*extern */
+void dGeomTriMeshEnableTC(dGeomID g, int geomClass, int enable)
+{
+    // Do nothing
+}
+
+/*extern */
+int dGeomTriMeshIsTCEnabled(dGeomID g, int geomClass)
+{
+    return 0;
+}
+
+
+/*extern */
+void dGeomTriMeshClearTCCache(dGeomID g)
+{
+    // Do nothing
+}
+
+
+/*extern */
+dTriMeshDataID dGeomTriMeshGetTriMeshDataID(dGeomID g)
+{
+    return NULL;
+}
+
+
+/*extern */
+int dGeomTriMeshGetTriangleCount (dGeomID g)
+{
+    return 0;
+}
+
+/*extern */
+void dGeomTriMeshDataUpdate(dTriMeshDataID g)
+{
+    // Do nothing
+}
+
+
+#endif // !dTRIMESH_ENABLED
+
+
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_gimpact.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_gimpact.cpp
new file mode 100644
index 0000000..d9b5ecd
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_gimpact.cpp
@@ -0,0 +1,424 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// TriMesh storage classes refactoring and face angle computation code by Oleh Derevenko (C) 2016-2017
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "util.h"
+
+
+#if dTRIMESH_ENABLED && dTRIMESH_GIMPACT
+
+#include "collision_util.h"
+#include "collision_trimesh_gimpact.h"
+#include "collision_trimesh_internal_impl.h"
+
+
+//////////////////////////////////////////////////////////////////////////
+// dxTriMeshData
+
+bool dxTriMeshData::preprocessData(bool /*buildUseFlags*//*=false*/, FaceAngleStorageMethod faceAndgesRequirement/*=ASM__INVALID*/)
+{
+    FaceAngleStorageMethod faceAndgesRequirementToUse = faceAndgesRequirement;
+
+    if (faceAndgesRequirement != ASM__INVALID && haveFaceAnglesBeenBuilt())
+    {
+        dUASSERT(false, "Another request to build face angles after they had already been built");
+
+        faceAndgesRequirementToUse = ASM__INVALID;
+    }
+
+    // If this mesh has already been preprocessed, exit
+    bool result = faceAndgesRequirementToUse == ASM__INVALID || retrieveTriangleCount() == 0 
+        || meaningfulPreprocessData(faceAndgesRequirementToUse);
+    return result;
+}
+
+struct TrimeshDataVertexIndexAccessor_GIMPACT
+{
+    enum
+    {
+        TRIANGLEINDEX_STRIDE = dxTriMesh::TRIANGLEINDEX_STRIDE,
+    };
+
+    explicit TrimeshDataVertexIndexAccessor_GIMPACT(dxTriMeshData *meshData):
+        m_TriangleVertexIndices(meshData->retrieveTriangleVertexIndices())
+    {
+        dIASSERT(meshData->retrieveTriangleStride() == TRIANGLEINDEX_STRIDE);
+    }
+
+    void getTriangleVertexIndices(unsigned out_VertexIndices[dMTV__MAX], unsigned triangleIdx) const
+    {
+        const GUINT32 *triIndicesBegin = m_TriangleVertexIndices;
+        const unsigned triStride = TRIANGLEINDEX_STRIDE;
+
+        const GUINT32 *triIndicesOfInterest = (const GUINT32 *)((const uint8 *)triIndicesBegin + (sizeint)triangleIdx * triStride);
+        std::copy(triIndicesOfInterest, triIndicesOfInterest + dMTV__MAX, out_VertexIndices);
+    }
+
+    const GUINT32           *m_TriangleVertexIndices;
+};
+
+struct TrimeshDataTrianglePointAccessor_GIMPACT
+{
+    enum
+    {
+        VERTEXINSTANCE_STRIDE = dxTriMesh::VERTEXINSTANCE_STRIDE,
+        TRIANGLEINDEX_STRIDE = dxTriMesh::TRIANGLEINDEX_STRIDE,
+    };
+
+    TrimeshDataTrianglePointAccessor_GIMPACT(dxTriMeshData *meshData):
+        m_VertexInstances(meshData->retrieveVertexInstances()),
+        m_TriangleVertexIndices(meshData->retrieveTriangleVertexIndices())
+    {
+        dIASSERT((unsigned)meshData->retrieveVertexStride() == (unsigned)VERTEXINSTANCE_STRIDE);
+        dIASSERT((unsigned)meshData->retrieveTriangleStride() == (unsigned)TRIANGLEINDEX_STRIDE);
+    }
+
+    void getTriangleVertexPoints(dVector3 out_Points[dMTV__MAX], unsigned triangleIndex) const
+    {
+        dxTriMeshData::retrieveTriangleVertexPoints(out_Points, triangleIndex, 
+            &m_VertexInstances[0][0], VERTEXINSTANCE_STRIDE, m_TriangleVertexIndices, TRIANGLEINDEX_STRIDE);
+    }
+
+    const vec3f             *m_VertexInstances;
+    const GUINT32           *m_TriangleVertexIndices;
+};
+
+bool dxTriMeshData::meaningfulPreprocessData(FaceAngleStorageMethod faceAndgesRequirement/*=ASM__INVALID*/)
+{
+    const bool buildFaceAngles = true; dIASSERT(faceAndgesRequirement != ASM__INVALID);
+    // dIASSERT(buildFaceAngles);
+    dIASSERT(/*!buildFaceAngles || */!haveFaceAnglesBeenBuilt());
+
+    bool result = false;
+
+    bool anglesAllocated = false;
+
+    do 
+    {
+        if (buildFaceAngles)
+        {
+            if (!allocateFaceAngles(faceAndgesRequirement))
+            {
+                break;
+            }
+        }
+
+        anglesAllocated = true;
+
+        const unsigned int numTris = retrieveTriangleCount();
+        const unsigned int numVertices = retrieveVertexCount();
+        sizeint numEdges = (sizeint)numTris * dMTV__MAX;
+        dIASSERT(numVertices <= numEdges); // Edge records are going to be used for vertex data as well
+
+        const sizeint recordsMemoryRequired = dEFFICIENT_SIZE(numEdges * sizeof(EdgeRecord));
+        const sizeint verticesMemoryRequired = /*dEFFICIENT_SIZE*/(numVertices * sizeof(VertexRecord)); // Skip alignment for the last chunk
+        const sizeint totalTempMemoryRequired = recordsMemoryRequired + verticesMemoryRequired;
+        void *tempBuffer = dAlloc(totalTempMemoryRequired);
+
+        if (tempBuffer == NULL)
+        {
+            break;
+        }
+
+        EdgeRecord *edges = (EdgeRecord *)tempBuffer;
+        VertexRecord *vertices = (VertexRecord *)((uint8 *)tempBuffer + recordsMemoryRequired);
+
+        TrimeshDataVertexIndexAccessor_GIMPACT indexAccessor(this);
+        meaningfulPreprocess_SetupEdgeRecords(edges, numEdges, indexAccessor);
+
+        // Sort the edges, so the ones sharing the same verts are beside each other
+        std::sort(edges, edges + numEdges);
+
+        TrimeshDataTrianglePointAccessor_GIMPACT pointAccessor(this);
+        const dReal *const externalNormals = retrieveNormals();
+        IFaceAngleStorageControl *faceAngles = retrieveFaceAngles();
+        meaningfulPreprocess_buildEdgeFlags(NULL, faceAngles, edges, numEdges, vertices, externalNormals, pointAccessor);
+
+        dFree(tempBuffer, totalTempMemoryRequired);
+
+        result = true;
+    }
+    while (false);
+
+    if (!result)
+    {
+        if (anglesAllocated)
+        {
+            if (buildFaceAngles)
+            {
+                freeFaceAngles();
+            }
+        }
+    }
+
+    return result;
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Trimesh
+
+dxTriMesh::~dxTriMesh()
+{
+    //Terminate Trimesh
+    gim_trimesh_destroy(&m_collision_trimesh);
+    gim_terminate_buffer_managers(m_buffer_managers);
+}
+
+
+/*virtual */
+void dxTriMesh::computeAABB()
+{
+    //update trimesh transform
+    mat4f transform;
+    IDENTIFY_MATRIX_4X4(transform);
+    MakeMatrix(this, transform);
+    gim_trimesh_set_tranform(&m_collision_trimesh, transform);
+
+    //Update trimesh boxes
+    gim_trimesh_update(&m_collision_trimesh);
+
+    GIM_AABB_COPY( &m_collision_trimesh.m_aabbset.m_global_bound, aabb );
+}
+
+
+void dxTriMesh::assignMeshData(dxTriMeshData *Data)
+{
+    // GIMPACT only supports stride 12, so we need to catch the error early.
+    dUASSERT(
+        (unsigned int)Data->retrieveVertexStride() == (unsigned)VERTEXINSTANCE_STRIDE 
+        && (unsigned int)Data->retrieveTriangleStride() == (unsigned)TRIANGLEINDEX_STRIDE,
+        "Gimpact trimesh only supports a stride of 3 float/int\n"
+        "This means that you cannot use dGeomTriMeshDataBuildSimple() with Gimpact.\n"
+        "Change the stride, or use Opcode trimeshes instead.\n"
+    );
+
+    dxTriMesh_Parent::assignMeshData(Data);
+
+    //Create trimesh
+    const vec3f *vertexInstances = Data->retrieveVertexInstances();
+    if ( vertexInstances != NULL )
+    {
+        const GUINT32 *triangleVertexIndices = Data->retrieveTriangleVertexIndices();
+
+        sizeint vertexInstanceCount = Data->retrieveVertexCount();
+        sizeint triangleVertexCount = (sizeint)Data->retrieveTriangleCount() * dMTV__MAX;
+
+        gim_trimesh_create_from_data(
+            m_buffer_managers,
+            &m_collision_trimesh,                           // gimpact mesh
+            const_cast<vec3f *>(vertexInstances),           // vertices
+            dCAST_TO_SMALLER(GUINT32, vertexInstanceCount), // nr of verts
+            0,                                              // copy verts?
+            const_cast<GUINT32 *>(triangleVertexIndices),   // indices
+            dCAST_TO_SMALLER(GUINT32, triangleVertexCount), // nr of indices
+            0,                                              // copy indices?
+            1                                               // transformed reply
+        );
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+
+/*extern */
+dTriMeshDataID dGeomTriMeshDataCreate()
+{
+    return new dxTriMeshData();
+}
+
+/*extern */
+void dGeomTriMeshDataDestroy(dTriMeshDataID g)
+{
+    dxTriMeshData *data = g;
+    delete data;
+}
+
+/*extern */
+void dGeomTriMeshDataSet(dTriMeshDataID g, int dataId, void *pDataLocation) 
+{
+    dUASSERT(g, "The argument is not a trimesh data");
+
+    dxTriMeshData *data = g;
+
+    switch (dataId)
+    {
+        case dTRIMESHDATA_FACE_NORMALS:
+        {
+            data->assignNormals((const dReal *)pDataLocation);
+            break;
+        }
+
+        case dTRIMESHDATA_USE_FLAGS: // Not used for GIMPACT
+        {
+            break;
+        }
+
+        // case dTRIMESHDATA__MAX: -- To be located by Find in Files
+        default:
+        {
+            dUASSERT(dataId, "invalid data type");
+            break;
+        }
+    }
+}
+
+static void *geomTriMeshDataGet(dTriMeshDataID g, int dataId, sizeint *pOutDataSize) ;
+
+/*extern */
+void *dGeomTriMeshDataGet(dTriMeshDataID g, int dataId) 
+{
+    return geomTriMeshDataGet(g, dataId, NULL);
+}
+
+/*extern */
+void *dGeomTriMeshDataGet2(dTriMeshDataID g, int dataId, sizeint *pOutDataSize) 
+{
+    return geomTriMeshDataGet(g, dataId, pOutDataSize);
+}
+
+static 
+void *geomTriMeshDataGet(dTriMeshDataID g, int dataId, sizeint *pOutDataSize) 
+{
+    dUASSERT(g, "The argument is not a trimesh data");
+
+    const dxTriMeshData *data = g;
+
+    void *result = NULL;
+
+    switch (dataId)
+    {
+        case dTRIMESHDATA_FACE_NORMALS:
+        {
+            if (pOutDataSize != NULL)
+            {
+                *pOutDataSize = data->calculateNormalsMemoryRequirement();
+            }
+
+            result = (void *)data->retrieveNormals();
+            break;
+        }
+
+        case dTRIMESHDATA_USE_FLAGS: // Not not used for GIMPACT
+        {
+            if (pOutDataSize != NULL)
+            {
+                *pOutDataSize = 0;
+            }
+
+            break;
+        }
+
+        // case dTRIMESHDATA__MAX: -- To be located by Find in Files
+        default:
+        {
+            if (pOutDataSize != NULL)
+            {
+                *pOutDataSize = 0;
+            }
+
+            dUASSERT(dataId, "invalid data type");
+            break;
+        }
+    }
+
+    return result;
+}
+
+/*extern */
+void dGeomTriMeshDataBuildSingle1(dTriMeshDataID g,
+    const void* Vertices, int VertexStride, int VertexCount,
+    const void* Indices, int IndexCount, int TriStride,
+    const void* Normals)
+{
+    dUASSERT(g, "The argument is not a trimesh data");
+    dAASSERT(Vertices);
+    dAASSERT(Indices);
+
+    dxTriMeshData *data = g;
+
+    data->buildData(Vertices, VertexStride, VertexCount,
+        Indices, IndexCount, TriStride,
+        Normals,
+        true);
+}
+
+/*extern */
+void dGeomTriMeshDataBuildDouble1(dTriMeshDataID g,
+    const void* Vertices, int VertexStride, int VertexCount,
+    const void* Indices, int IndexCount, int TriStride,
+    const void* Normals)
+{
+    dUASSERT(g, "The argument is not a trimesh data");
+    dAASSERT(Vertices);
+    dAASSERT(Indices);
+
+    dxTriMeshData *data = g;
+
+    data->buildData(Vertices, VertexStride, VertexCount,
+        Indices, IndexCount, TriStride,
+        Normals,
+        false);
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+
+/*extern */
+dGeomID dCreateTriMesh(dSpaceID space,
+    dTriMeshDataID Data,
+    dTriCallback* Callback,
+    dTriArrayCallback* ArrayCallback,
+    dTriRayCallback* RayCallback)
+{
+    dxTriMesh *mesh = new dxTriMesh(space, Data, Callback, ArrayCallback, RayCallback);
+    return mesh;
+}
+
+
+/*extern */
+void dGeomTriMeshSetLastTransform(dGeomID g, const dMatrix4 last_trans ) 
+{
+    dAASSERT(g);
+    dUASSERT(g->type == dTriMeshClass, "The geom is not a trimesh");
+
+    //stub
+}
+
+/*extern */
+const dReal *dGeomTriMeshGetLastTransform(dGeomID g)
+{
+    dAASSERT(g);
+    dUASSERT(g->type == dTriMeshClass, "The geom is not a trimesh");
+
+    return NULL; // stub
+}
+
+
+#endif // #if dTRIMESH_ENABLED && dTRIMESH_GIMPACT
+
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_gimpact.h b/libs/ode-0.16.1/ode/src/collision_trimesh_gimpact.h
new file mode 100644
index 0000000..b928e97
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_gimpact.h
@@ -0,0 +1,278 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// TriMesh code by Erwin de Vries.
+// Modified for FreeSOLID Compatibility by Rodrigo Hernandez
+// Trimesh caches separation by Oleh Derevenko
+// TriMesh storage classes refactoring and face angle computation code by Oleh Derevenko (C) 2016-2019
+
+
+#ifndef _ODE_COLLISION_TRIMESH_GIMPACT_H_
+#define _ODE_COLLISION_TRIMESH_GIMPACT_H_
+
+
+#if dTRIMESH_ENABLED && dTRIMESH_GIMPACT
+
+
+//****************************************************************************
+// dxTriMesh class
+
+
+#include "collision_kernel.h"
+#include "collision_trimesh_colliders.h"
+#include "collision_util.h"
+#include <ode/collision_trimesh.h>
+
+#include "collision_trimesh_internal.h"
+#include <GIMPACT/gimpact.h>
+
+
+struct TrimeshCollidersCache // Required for compatibility with OPCODE
+{
+};
+
+
+typedef dxTriDataBase dxTriMeshData_Parent;
+struct dxTriMeshData:
+    public dxTriMeshData_Parent
+{
+public:
+    dxTriMeshData():
+        dxTriMeshData_Parent()
+    {
+    }
+
+    ~dxTriMeshData() { /* Do nothing */ }
+
+    using dxTriMeshData_Parent::buildData;
+    
+    /* Setup the UseFlags array and/or build face angles*/
+    bool preprocessData(bool buildUseFlags/*=false*/, FaceAngleStorageMethod faceAndgesRequirement/*=ASM__INVALID*/);
+    
+private:
+    bool meaningfulPreprocessData(FaceAngleStorageMethod faceAndgesRequirement/*=ASM__INVALID*/);
+
+public:
+    /* For when app changes the vertices */
+    void updateData() { /* Do nothing */ }
+
+public:
+    const vec3f *retrieveVertexInstances() const { return (const vec3f *)dxTriMeshData_Parent::retrieveVertexInstances(); }
+    const GUINT32 *retrieveTriangleVertexIndices() const { return (const GUINT32 *)dxTriMeshData_Parent::retrieveTriangleVertexIndices(); }
+
+public:
+    void assignNormals(const dReal *normals) { dxTriMeshData_Parent::assignNormals(normals); }
+    const dReal *retrieveNormals() const { return (const dReal *)dxTriMeshData_Parent::retrieveNormals(); }
+    sizeint calculateNormalsMemoryRequirement() const { return retrieveTriangleCount() * (sizeof(dReal) * dSA__MAX); }
+};
+
+
+
+#ifdef dDOUBLE
+// To use GIMPACT with doubles, we need to patch a couple of the GIMPACT functions to 
+// convert arguments to floats before sending them in
+
+
+/// Convert an gimpact vec3f to a ODE dVector3d:   dVector3[i] = vec3f[i]
+#define dVECTOR3_VEC3F_COPY(b,a) { \
+    (b)[0] = (a)[0];              \
+    (b)[1] = (a)[1];              \
+    (b)[2] = (a)[2];              \
+    (b)[3] = 0;                   \
+}
+
+static inline 
+void gim_trimesh_get_triangle_verticesODE(GIM_TRIMESH * trimesh, GUINT32 triangle_index, dVector3 v1, dVector3 v2, dVector3 v3)
+{
+    vec3f src1, src2, src3;
+    GREAL *psrc1 = v1 != NULL ? src1 : NULL;
+    GREAL *psrc2 = v2 != NULL ? src2 : NULL;
+    GREAL *psrc3 = v3 != NULL ? src3 : NULL;
+    gim_trimesh_get_triangle_vertices(trimesh, triangle_index, psrc1, psrc2, psrc3);
+
+    if (v1 != NULL)
+    {
+        dVECTOR3_VEC3F_COPY(v1, src1);
+    }
+
+    if (v2 != NULL)
+    {
+        dVECTOR3_VEC3F_COPY(v2, src2);
+    }
+
+    if (v3 != NULL)
+    {
+        dVECTOR3_VEC3F_COPY(v3, src3);
+    }
+}
+
+// Anything calling gim_trimesh_get_triangle_vertices from within ODE 
+// should be patched through to the dDOUBLE version above
+
+#define gim_trimesh_get_triangle_vertices gim_trimesh_get_triangle_verticesODE
+
+static inline 
+int gim_trimesh_ray_closest_collisionODE( GIM_TRIMESH *mesh, dVector3 origin, dVector3 dir, dReal tmax, GIM_TRIANGLE_RAY_CONTACT_DATA *contact )
+{
+    vec3f dir_vec3f    = { (GREAL)dir[ 0 ],    (GREAL)dir[ 1 ],    (GREAL)dir[ 2 ]    };
+    vec3f origin_vec3f = { (GREAL)origin[ 0 ], (GREAL)origin[ 1 ], (GREAL)origin[ 2 ] };
+
+    return gim_trimesh_ray_closest_collision( mesh, origin_vec3f, dir_vec3f, (GREAL)tmax, contact );
+}
+
+static inline 
+int gim_trimesh_ray_collisionODE( GIM_TRIMESH *mesh, const dVector3 origin, const dVector3 dir, dReal tmax, GIM_TRIANGLE_RAY_CONTACT_DATA *contact )
+{
+    vec3f dir_vec3f    = { (GREAL)dir[ 0 ],    (GREAL)dir[ 1 ],    (GREAL)dir[ 2 ]    };
+    vec3f origin_vec3f = { (GREAL)origin[ 0 ], (GREAL)origin[ 1 ], (GREAL)origin[ 2 ] };
+
+    return gim_trimesh_ray_collision( mesh, origin_vec3f, dir_vec3f, (GREAL)tmax, contact );
+}
+
+static inline 
+void gim_trimesh_sphere_collisionODE( GIM_TRIMESH *mesh, const dVector3 Position, dReal Radius, GDYNAMIC_ARRAY *contact )
+{
+    vec3f pos_vec3f = { (GREAL)Position[ 0 ], (GREAL)Position[ 1 ], (GREAL)Position[ 2 ] };
+    gim_trimesh_sphere_collision( mesh, pos_vec3f, (GREAL)Radius, contact );
+}
+
+static inline 
+void gim_trimesh_plane_collisionODE( GIM_TRIMESH *mesh, const dVector4 plane, GDYNAMIC_ARRAY *contact )
+{
+    vec4f plane_vec4f = { (GREAL)plane[ 0 ], (GREAL)plane[ 1 ], (GREAL)plane[ 2 ], (GREAL)plane[ 3 ] }; \
+    gim_trimesh_plane_collision( mesh, plane_vec4f, contact );	    \
+}
+
+#define GIM_AABB_COPY( src, dst ) {		\
+    (dst)[ 0 ]= (src) -> minX;			\
+    (dst)[ 1 ]= (src) -> maxX;			\
+    (dst)[ 2 ]= (src) -> minY;			\
+    (dst)[ 3 ]= (src) -> maxY;			\
+    (dst)[ 4 ]= (src) -> minZ;			\
+    (dst)[ 5 ]= (src) -> maxZ;			\
+}
+
+
+#else // #ifdef !dDOUBLE
+
+// With single precision, we can pass native ODE vectors directly to GIMPACT
+
+#define gim_trimesh_ray_closest_collisionODE 	gim_trimesh_ray_closest_collision
+#define gim_trimesh_ray_collisionODE 			gim_trimesh_ray_collision
+#define gim_trimesh_sphere_collisionODE 		gim_trimesh_sphere_collision
+#define gim_trimesh_plane_collisionODE 			gim_trimesh_plane_collision
+
+#define GIM_AABB_COPY( src, dst ) 	memcpy( dst, src, 6 * sizeof( GREAL ) )
+
+
+#endif // #ifdef !dDOUBLE
+
+
+typedef dxMeshBase dxTriMesh_Parent;
+struct dxTriMesh: 
+    public dxTriMesh_Parent
+{
+public:
+    // Functions
+    dxTriMesh(dxSpace *Space, dxTriMeshData *Data,
+        dTriCallback *Callback, dTriArrayCallback *ArrayCallback, dTriRayCallback *RayCallback):
+        dxTriMesh_Parent(Space, NULL, Callback, ArrayCallback, RayCallback, true) // TC has speed/space 'issues' that don't make it a clear win by default on spheres/boxes.
+    {
+        gim_init_buffer_managers(m_buffer_managers);
+        assignMeshData(Data);
+    }
+
+    ~dxTriMesh();
+
+    void clearTCCache() { /* do nothing */ }
+
+    virtual void computeAABB();
+
+public:
+    dxTriMeshData *retrieveMeshData() const { return getMeshData(); }
+
+    unsigned getMeshTriangleCount() const { return gim_trimesh_get_triangle_count(const_cast<GIM_TRIMESH *>(&m_collision_trimesh)); }
+
+    void fetchMeshTransformedTriangle(dVector3 *const pout_triangle[3], unsigned index)
+    {
+        gim_trimesh_locks_work_data(&m_collision_trimesh);
+        gim_trimesh_get_triangle_vertices(&m_collision_trimesh, (GUINT32)index, *pout_triangle[0], *pout_triangle[1], *pout_triangle[2]);
+        gim_trimesh_unlocks_work_data(&m_collision_trimesh);
+    }
+
+    void fetchMeshTransformedTriangle(dVector3 out_triangle[3], unsigned index)
+    {
+        gim_trimesh_locks_work_data(&m_collision_trimesh);
+        gim_trimesh_get_triangle_vertices(&m_collision_trimesh, (GUINT32)index, out_triangle[0], out_triangle[1], out_triangle[2]);
+        gim_trimesh_unlocks_work_data(&m_collision_trimesh);
+    }
+
+private:
+    dxTriMeshData *getMeshData() const { return static_cast<dxTriMeshData *>(dxTriMesh_Parent::getMeshData()); }
+
+public:
+    enum
+    {
+        VERTEXINSTANCE_STRIDE = sizeof(vec3f),
+        TRIANGLEINDEX_STRIDE = sizeof(GUINT32) * dMTV__MAX,
+    };
+
+    void assignMeshData(dxTriMeshData *Data);
+
+public:
+    GIM_TRIMESH  m_collision_trimesh;
+    GBUFFER_MANAGER_DATA m_buffer_managers[G_BUFFER_MANAGER__MAX];
+};
+
+
+static inline 
+void MakeMatrix(const dVector3 position, const dMatrix3 rotation, mat4f m)
+{
+    m[0][0] = (GREAL)rotation[dM3E_XX];
+    m[0][1] = (GREAL)rotation[dM3E_XY];
+    m[0][2] = (GREAL)rotation[dM3E_XZ];
+
+    m[1][0] = (GREAL)rotation[dM3E_YX];
+    m[1][1] = (GREAL)rotation[dM3E_YY];
+    m[1][2] = (GREAL)rotation[dM3E_YZ];
+
+    m[2][0] = (GREAL)rotation[dM3E_ZX];
+    m[2][1] = (GREAL)rotation[dM3E_ZY];
+    m[2][2] = (GREAL)rotation[dM3E_ZZ];
+
+    m[0][3] = (GREAL)position[dV3E_X];
+    m[1][3] = (GREAL)position[dV3E_Y];
+    m[2][3] = (GREAL)position[dV3E_Z];
+}
+
+static inline 
+void MakeMatrix(dxGeom *g, mat4f m)
+{
+    const dVector3 &position = g->buildUpdatedPosition();
+    const dMatrix3 &rotation = g->buildUpdatedRotation();
+    MakeMatrix(position, rotation, m);
+}
+
+
+#endif // #if dTRIMESH_ENABLED && dTRIMESH_GIMPACT
+
+#endif	//_ODE_COLLISION_TRIMESH_GIMPACT_H_
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_internal.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_internal.cpp
new file mode 100644
index 0000000..b96e25f
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_internal.cpp
@@ -0,0 +1,804 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// TriMesh storage classes refactoring and face angle computation code by Oleh Derevenko (C) 2016-2019
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+
+
+#if dTRIMESH_ENABLED
+
+#include "collision_trimesh_internal.h"
+#include "odeou.h"
+
+#include <algorithm>
+
+
+
+//////////////////////////////////////////////////////////////////////////
+
+enum EdgeStorageSignInclusion
+{
+    SSI__MIN,
+
+    SSI_SIGNED_STORED = SSI__MIN,
+    SSI_POSITIVE_STORED,
+
+    SSI__MAX,
+};
+
+template<typename TStorageType, EdgeStorageSignInclusion t_SignInclusion>
+class FaceAngleStorageCodec;
+
+template<typename TStorageType>
+class FaceAngleStorageCodec<TStorageType, SSI_SIGNED_STORED>
+{
+public:
+    typedef typename _make_signed<TStorageType>::type storage_type;
+    enum
+    {
+        STORAGE_TYPE_MAX = (typename _make_unsigned<TStorageType>::type)(~(typename _make_unsigned<TStorageType>::type)0) >> 1,
+    };
+
+    static bool areNegativeAnglesCoded()
+    {
+        return true;
+    }
+
+    static storage_type encodeForStorage(dReal angleValue)
+    {
+        unsigned angleAsInt = (unsigned)dFloor(dFabs(angleValue) * (dReal)(STORAGE_TYPE_MAX / M_PI));
+        unsigned limitedAngleAsInt = dMACRO_MIN(angleAsInt, STORAGE_TYPE_MAX);
+        storage_type result = angleValue < REAL(0.0) ? -(storage_type)limitedAngleAsInt : (storage_type)limitedAngleAsInt; 
+        return  result;
+    }
+
+    static FaceAngleDomain classifyStorageValue(storage_type storedValue)
+    {
+        dSASSERT(EAD__MAX == 3);
+
+        return storedValue < 0 ? FAD_CONCAVE : (storedValue == 0 ? FAD_FLAT : FAD_CONVEX);
+    }
+
+    static bool isAngleDomainStored(FaceAngleDomain domainValue)
+    {
+        return !dTMPL_IN_RANGE(domainValue, FAD__SIGNSTORED_IMPLICITVALUE_MIN, FAD__SIGNSTORED_IMPLICITVALUE_MAX);
+    }
+
+    static dReal decodeStorageValue(storage_type storedValue)
+    {
+        return storedValue * (dReal)(M_PI / STORAGE_TYPE_MAX);
+    }
+};
+
+template<typename TStorageType>
+class FaceAngleStorageCodec<TStorageType, SSI_POSITIVE_STORED>
+{
+public:
+    typedef typename _make_unsigned<TStorageType>::type storage_type;
+    enum
+    {
+        STORAGE_TYPE_MIN = 0,
+        STORAGE_TYPE_MAX = (storage_type)(~(storage_type)0),
+    };
+
+    static bool areNegativeAnglesCoded()
+    {
+        return false;
+    }
+
+    static storage_type encodeForStorage(dReal angleValue)
+    {
+        storage_type result = STORAGE_TYPE_MIN;
+
+        if (angleValue >= REAL(0.0))
+        {
+            unsigned angleAsInt = (unsigned)dFloor(angleValue * (dReal)(((STORAGE_TYPE_MAX - STORAGE_TYPE_MIN - 1) / M_PI)));
+            result = (STORAGE_TYPE_MIN + 1) + dMACRO_MIN(angleAsInt, STORAGE_TYPE_MAX - STORAGE_TYPE_MIN - 1); 
+        }
+
+        return  result;
+    }
+
+    static FaceAngleDomain classifyStorageValue(storage_type storedValue)
+    {
+        dSASSERT(EAD__MAX == 3);
+
+        return storedValue < STORAGE_TYPE_MIN + 1 ? FAD_CONCAVE : (storedValue == STORAGE_TYPE_MIN + 1 ? FAD_FLAT : FAD_CONVEX);
+    }
+
+    static bool isAngleDomainStored(FaceAngleDomain domainValue)
+    {
+        return dTMPL_IN_RANGE(domainValue, FAD__BYTEPOS_STORED_MIN, FAD__BYTEPOS_STORED_MAX);
+    }
+
+    static dReal decodeStorageValue(storage_type storedValue)
+    {
+        dIASSERT(storedValue >= (STORAGE_TYPE_MIN + 1));
+
+        return (storedValue - (STORAGE_TYPE_MIN + 1)) * (dReal)(M_PI / (STORAGE_TYPE_MAX - STORAGE_TYPE_MIN - 1));
+    }
+};
+
+template<class TStorageCodec>
+class FaceAnglesWrapper:
+    public IFaceAngleStorageControl,
+    public IFaceAngleStorageView
+{
+protected:
+    FaceAnglesWrapper(unsigned triangleCount) { setAllocatedTriangleCount(triangleCount); }
+
+public:
+    virtual ~FaceAnglesWrapper();
+
+    static IFaceAngleStorageControl *allocateInstance(unsigned triangleCount, IFaceAngleStorageView *&out_storageView);
+
+    static bool calculateInstanceSizeRequired(sizeint &out_sizeRequired, unsigned triangleCount);
+
+private:
+    void freeInstance();
+
+private:
+    typedef typename TStorageCodec::storage_type storage_type;
+    typedef storage_type TriangleFaceAngles[dMTV__MAX];
+
+    struct StorageRecord
+    {
+        StorageRecord(): m_triangleCount(0) {}
+
+        unsigned        m_triangleCount;
+        TriangleFaceAngles  m_triangleFaceAngles[1];
+    };
+
+    static sizeint calculateStorageSizeForTriangleCount(unsigned triangleCount)
+    {
+        const unsigned baseIncludedTriangleCount = dSTATIC_ARRAY_SIZE(FaceAnglesWrapper<TStorageCodec>::StorageRecord, m_triangleFaceAngles);
+        const sizeint singleTriangleSize = membersize(FaceAnglesWrapper<TStorageCodec>::StorageRecord, m_triangleFaceAngles[0]);
+        return sizeof(FaceAnglesWrapper<TStorageCodec>) + (triangleCount > baseIncludedTriangleCount ? (triangleCount - baseIncludedTriangleCount) * singleTriangleSize : 0U);
+    }
+
+    static sizeint calculateTriangleCountForStorageSize(sizeint storageSize)
+    {
+        dIASSERT(storageSize >= sizeof(FaceAnglesWrapper<TStorageCodec>));
+
+        const unsigned baseIncludedTriangleCount = dSTATIC_ARRAY_SIZE(FaceAnglesWrapper<TStorageCodec>::StorageRecord, m_triangleFaceAngles);
+        const sizeint singleTriangleSize = membersize(FaceAnglesWrapper<TStorageCodec>::StorageRecord, m_triangleFaceAngles[0]);
+        return (storageSize - sizeof(FaceAnglesWrapper<TStorageCodec>)) / singleTriangleSize + baseIncludedTriangleCount;
+    }
+
+private: // IFaceAngleStorageControl
+    virtual void disposeStorage();
+
+    virtual bool areNegativeAnglesStored() const;
+
+    virtual void assignFacesAngleIntoStorage(unsigned triangleIndex, dMeshTriangleVertex vertexIndex, dReal dAngleValue);
+
+private: // IFaceAngleStorageView
+    virtual FaceAngleDomain retrieveFacesAngleFromStorage(dReal &out_angleValue, unsigned triangleIndex, dMeshTriangleVertex vertexIndex);
+
+public:
+    void setFaceAngle(unsigned triangleIndex, dMeshTriangleVertex vertexIndex, dReal dAngleValue)
+    {
+        dIASSERT(dTMPL_IN_RANGE(triangleIndex, 0, getAllocatedTriangleCount()));
+        dIASSERT(dTMPL_IN_RANGE(vertexIndex, dMTV__MIN, dMTV__MAX));
+
+        m_record.m_triangleFaceAngles[triangleIndex][vertexIndex] = TStorageCodec::encodeForStorage(dAngleValue);
+    }
+
+    FaceAngleDomain getFaceAngle(dReal &out_angleValue, unsigned triangleIndex, dMeshTriangleVertex vertexIndex) const
+    {
+        dIASSERT(dTMPL_IN_RANGE(triangleIndex, 0, getAllocatedTriangleCount()));
+        dIASSERT(dTMPL_IN_RANGE(vertexIndex, dMTV__MIN, dMTV__MAX));
+
+        storage_type storedValue = m_record.m_triangleFaceAngles[triangleIndex][vertexIndex];
+        FaceAngleDomain resultDomain = TStorageCodec::classifyStorageValue(storedValue);
+
+        out_angleValue = TStorageCodec::isAngleDomainStored(resultDomain) ? TStorageCodec::decodeStorageValue(storedValue) : REAL(0.0);
+        return resultDomain;
+    }
+
+private:
+    unsigned getAllocatedTriangleCount() const { return m_record.m_triangleCount; }
+    void setAllocatedTriangleCount(unsigned triangleCount) { m_record.m_triangleCount = triangleCount; }
+
+private:
+    StorageRecord       m_record;
+};
+
+
+template<class TStorageCodec>
+FaceAnglesWrapper<TStorageCodec>::~FaceAnglesWrapper()
+{
+}
+
+
+template<class TStorageCodec>
+/*static */
+IFaceAngleStorageControl *FaceAnglesWrapper<TStorageCodec>::allocateInstance(unsigned triangleCount, IFaceAngleStorageView *&out_storageView)
+{
+    FaceAnglesWrapper<TStorageCodec> *result = NULL;
+
+    do
+    {
+        sizeint sizeRequired;
+        if (!FaceAnglesWrapper<TStorageCodec>::calculateInstanceSizeRequired(sizeRequired, triangleCount))
+        {
+            break;
+        }
+
+        void *bufferPointer = dAlloc(sizeRequired);
+        if (bufferPointer == NULL)
+        {
+            break;
+        }
+
+        result = (FaceAnglesWrapper<TStorageCodec> *)bufferPointer;
+        new(result) FaceAnglesWrapper<TStorageCodec>(triangleCount);
+
+        out_storageView = result;
+    }
+    while (false);
+
+    return result;
+}
+
+template<class TStorageCodec>
+/*static */
+bool FaceAnglesWrapper<TStorageCodec>::calculateInstanceSizeRequired(sizeint &out_sizeRequired, unsigned triangleCount)
+{
+    bool result = false;
+
+    do
+    {
+        sizeint triangleMaximumCount = calculateTriangleCountForStorageSize(SIZE_MAX);
+        dIASSERT(triangleCount <= triangleMaximumCount);
+
+        if (triangleCount > triangleMaximumCount) // Check for overflow
+        {
+            break;
+        }
+
+        out_sizeRequired = calculateStorageSizeForTriangleCount(triangleCount); // Trailing alignment is going to be added by memory manager automatically
+        result = true;
+    }
+    while (false);
+
+    return result;
+}
+
+template<class TStorageCodec>
+void FaceAnglesWrapper<TStorageCodec>::freeInstance()
+{
+    unsigned triangleCount = getAllocatedTriangleCount();
+
+    this->FaceAnglesWrapper<TStorageCodec>::~FaceAnglesWrapper();
+
+    sizeint memoryBlockSize = calculateStorageSizeForTriangleCount(triangleCount);
+    dFree(this, memoryBlockSize);
+}
+
+
+template<class TStorageCodec>
+/*virtual */
+void FaceAnglesWrapper<TStorageCodec>::disposeStorage()
+{
+    freeInstance();
+}
+
+template<class TStorageCodec>
+/*virtual */
+bool FaceAnglesWrapper<TStorageCodec>::areNegativeAnglesStored() const
+{
+    return TStorageCodec::areNegativeAnglesCoded();
+}
+
+template<class TStorageCodec>
+/*virtual */
+void FaceAnglesWrapper<TStorageCodec>::assignFacesAngleIntoStorage(unsigned triangleIndex, dMeshTriangleVertex vertexIndex, dReal dAngleValue)
+{
+    setFaceAngle(triangleIndex, vertexIndex, dAngleValue);
+}
+
+template<class TStorageCodec>
+/*virtual */
+FaceAngleDomain FaceAnglesWrapper<TStorageCodec>::retrieveFacesAngleFromStorage(dReal &out_angleValue, unsigned triangleIndex, dMeshTriangleVertex vertexIndex)
+{
+    return getFaceAngle(out_angleValue, triangleIndex, vertexIndex);
+}
+
+
+typedef IFaceAngleStorageControl *(FAngleStorageAllocProc)(unsigned triangleCount, IFaceAngleStorageView *&out_storageView);
+
+BEGIN_NAMESPACE_OU();
+template<>
+FAngleStorageAllocProc *const CEnumUnsortedElementArray<FaceAngleStorageMethod, ASM__MAX, FAngleStorageAllocProc *, 0x161211AD>::m_aetElementArray[] =
+{
+    &FaceAnglesWrapper<FaceAngleStorageCodec<uint8, SSI_SIGNED_STORED> >::allocateInstance, // ASM_BYTE_SIGNED,
+    &FaceAnglesWrapper<FaceAngleStorageCodec<uint8, SSI_POSITIVE_STORED> >::allocateInstance, // ASM_BYTE_POSITIVE,
+    &FaceAnglesWrapper<FaceAngleStorageCodec<uint16, SSI_SIGNED_STORED> >::allocateInstance, // ASM_WORD_SIGNED,
+};
+END_NAMESPACE_OU();
+static const CEnumUnsortedElementArray<FaceAngleStorageMethod, ASM__MAX, FAngleStorageAllocProc *, 0x161211AD> g_AngleStorageAllocProcs;
+
+
+//////////////////////////////////////////////////////////////////////////
+
+dxTriDataBase::~dxTriDataBase()
+{
+    freeFaceAngles();
+}
+
+
+void dxTriDataBase::buildData(const void *vertices, int vertexStride, unsigned vertexCount,
+    const void *indices, unsigned indexCount, int triStride,
+    const void *normals,
+    bool single)
+{
+    dIASSERT(vertices);
+    dIASSERT(indices);
+    dIASSERT(vertexStride);
+    dIASSERT(triStride);
+    dIASSERT(indexCount);
+    dIASSERT(indexCount % dMTV__MAX == 0);
+
+    m_vertices = vertices;
+    m_vertexStride = vertexStride;
+    m_vertexCount = vertexCount;
+    m_indices = indices;
+    m_triangleCount = indexCount / dMTV__MAX;
+    m_triStride = triStride;
+    m_single = single;
+
+    m_normals = normals;
+}
+
+
+bool dxTriDataBase::allocateFaceAngles(FaceAngleStorageMethod storageMethod)
+{
+    bool result = false;
+
+    dIASSERT(m_faceAngles == NULL);
+    
+    IFaceAngleStorageView *storageView;
+
+    unsigned triangleCount = m_triangleCount;
+
+    FAngleStorageAllocProc *allocProc = g_AngleStorageAllocProcs.Encode(storageMethod);
+    IFaceAngleStorageControl *storageInstance = allocProc(triangleCount, storageView);
+
+    if (storageInstance != NULL)
+    {
+        m_faceAngles = storageInstance;
+        m_faceAngleView = storageView;
+        result = true;
+    }
+
+    return result;
+}
+
+void dxTriDataBase::freeFaceAngles()
+{
+    if (m_faceAngles != NULL)
+    {
+        m_faceAngles->disposeStorage();
+        m_faceAngles = NULL;
+        m_faceAngleView = NULL;
+    }
+}
+
+
+void dxTriDataBase::EdgeRecord::setupEdge(dMeshTriangleVertex edgeIdx, int triIdx, const unsigned vertexIndices[dMTV__MAX])
+{
+    if (edgeIdx < dMTV_SECOND)
+    {
+        dIASSERT(edgeIdx == dMTV_FIRST);
+
+        m_edgeFlags  = dxTriMeshData::CUF_USE_FIRST_EDGE;
+        m_vert1Flags = dxTriMeshData::CUF_USE_FIRST_VERTEX;
+        m_vert2Flags = dxTriMeshData::CUF_USE_SECOND_VERTEX;
+        m_vertIdx1 = vertexIndices[dMTV_FIRST];
+        m_vertIdx2 = vertexIndices[dMTV_SECOND];
+    }
+    else if (edgeIdx == dMTV_SECOND)
+    {
+        m_edgeFlags  = dxTriMeshData::CUF_USE_SECOND_EDGE;
+        m_vert1Flags = dxTriMeshData::CUF_USE_SECOND_VERTEX;
+        m_vert2Flags = dxTriMeshData::CUF_USE_THIRD_VERTEX;
+        m_vertIdx1 = vertexIndices[dMTV_SECOND];
+        m_vertIdx2 = vertexIndices[dMTV_THIRD];
+    }
+    else
+    {
+        dIASSERT(edgeIdx == dMTV_THIRD);
+
+        m_edgeFlags  = dxTriMeshData::CUF_USE_THIRD_EDGE;
+        m_vert1Flags = dxTriMeshData::CUF_USE_THIRD_VERTEX;
+        m_vert2Flags = dxTriMeshData::CUF_USE_FIRST_VERTEX;
+        m_vertIdx1 = vertexIndices[dMTV_THIRD];
+        m_vertIdx2 = vertexIndices[dMTV_FIRST];
+    }
+
+    // Make sure vertex index 1 is less than index 2 (for easier sorting)
+    if (m_vertIdx1 > m_vertIdx2)
+    {
+        dxSwap(m_vertIdx1, m_vertIdx2);
+        dxSwap(m_vert1Flags, m_vert2Flags);
+    }
+
+    m_triIdx = triIdx;
+    m_absVertexFlags = 0;
+}
+
+
+BEGIN_NAMESPACE_OU();
+template<>
+const dMeshTriangleVertex CEnumUnsortedElementArray<unsigned, dxTriDataBase::CUF__USE_VERTICES_LAST / dxTriDataBase::CUF__USE_VERTICES_MIN, dMeshTriangleVertex, 0x161116DC>::m_aetElementArray[] = 
+{
+    dMTV_FIRST, // kVert0 / kVert_Base
+    dMTV_SECOND, // kVert1 / kVert_Base
+    dMTV__MAX,
+    dMTV_THIRD, // kVert2 / kVert_Base
+};
+END_NAMESPACE_OU();
+/*extern */const CEnumUnsortedElementArray<unsigned, dxTriDataBase::CUF__USE_VERTICES_LAST / dxTriDataBase::CUF__USE_VERTICES_MIN, dMeshTriangleVertex, 0x161116DC> g_VertFlagOppositeIndices;
+
+BEGIN_NAMESPACE_OU();
+template<>
+const dMeshTriangleVertex CEnumUnsortedElementArray<unsigned, dxTriDataBase::CUF__USE_VERTICES_LAST / dxTriDataBase::CUF__USE_VERTICES_MIN, dMeshTriangleVertex, 0x161225E9>::m_aetElementArray[] = 
+{
+    dMTV_SECOND, // kVert0 / kVert_Base
+    dMTV_THIRD, // kVert1 / kVert_Base
+    dMTV__MAX,
+    dMTV_FIRST, // kVert2 / kVert_Base
+};
+END_NAMESPACE_OU();
+/*extern */const CEnumUnsortedElementArray<unsigned, dxTriDataBase::CUF__USE_VERTICES_LAST / dxTriDataBase::CUF__USE_VERTICES_MIN, dMeshTriangleVertex, 0x161225E9> g_VertFlagEdgeStartIndices;
+
+
+//////////////////////////////////////////////////////////////////////////
+
+/*extern ODE_API */
+void dGeomTriMeshDataBuildSimple1(dTriMeshDataID g,
+    const dReal* Vertices, int VertexCount, 
+    const dTriIndex* Indices, int IndexCount,
+    const int *Normals)
+{
+#ifdef dSINGLE
+    dGeomTriMeshDataBuildSingle1(g,
+        Vertices, 4 * sizeof(dReal), VertexCount, 
+        Indices, IndexCount, 3 * sizeof(dTriIndex),
+        Normals);
+#else
+    dGeomTriMeshDataBuildDouble1(g, Vertices, 4 * sizeof(dReal), VertexCount, 
+        Indices, IndexCount, 3 * sizeof(dTriIndex),
+        Normals);
+#endif
+}
+
+
+/*extern ODE_API */
+void dGeomTriMeshDataBuildSingle(dTriMeshDataID g,
+    const void* Vertices, int VertexStride, int VertexCount, 
+    const void* Indices, int IndexCount, int TriStride)
+{
+    dGeomTriMeshDataBuildSingle1(g, Vertices, VertexStride, VertexCount,
+        Indices, IndexCount, TriStride, (const void *)NULL);
+}
+
+/*extern ODE_API */
+void dGeomTriMeshDataBuildDouble(dTriMeshDataID g,
+    const void* Vertices, int VertexStride, int VertexCount, 
+    const void* Indices, int IndexCount, int TriStride)
+{
+    dGeomTriMeshDataBuildDouble1(g, Vertices, VertexStride, VertexCount,
+        Indices, IndexCount, TriStride, NULL);
+}
+
+/*extern ODE_API */
+void dGeomTriMeshDataBuildSimple(dTriMeshDataID g,
+    const dReal* Vertices, int VertexCount, 
+    const dTriIndex* Indices, int IndexCount)
+{
+    dGeomTriMeshDataBuildSimple1(g,
+        Vertices, VertexCount, Indices, IndexCount,
+        (int *)NULL);
+}
+
+
+/*extern ODE_API */
+int dGeomTriMeshDataPreprocess(dTriMeshDataID g)
+{
+    unsigned buildRequestFlags = (1U << dTRIDATAPREPROCESS_BUILD_CONCAVE_EDGES);
+    return dGeomTriMeshDataPreprocess2(g, buildRequestFlags, NULL);
+}
+
+
+BEGIN_NAMESPACE_OU();
+template<>
+const FaceAngleStorageMethod CEnumUnsortedElementArray<unsigned, dTRIDATAPREPROCESS_FACE_ANGLES_EXTRA__MAX, FaceAngleStorageMethod, 0x17010902>::m_aetElementArray[] = 
+{
+    ASM_BYTE_POSITIVE, // dTRIDATAPREPROCESS_FACE_ANGLES_EXTRA_BYTE_POSITIVE,
+    ASM_BYTE_SIGNED, // dTRIDATAPREPROCESS_FACE_ANGLES_EXTRA_BYTE_ALL,
+    ASM_WORD_SIGNED, // dTRIDATAPREPROCESS_FACE_ANGLES_EXTRA_WORD_ALL,
+};
+END_NAMESPACE_OU();
+static const CEnumUnsortedElementArray<unsigned, dTRIDATAPREPROCESS_FACE_ANGLES_EXTRA__MAX, FaceAngleStorageMethod, 0x17010902> g_TriMeshDataPreprocess_FaceAndlesExtraDataAngleStorageMethods;
+
+/*extern ODE_API */
+int dGeomTriMeshDataPreprocess2(dTriMeshDataID g, unsigned int buildRequestFlags, const intptr *requestExtraData/*=NULL | const intptr (*)[dTRIDATAPREPROCESS_BUILD__MAX]*/)
+{
+    dUASSERT(g, "The argument is not a trimesh data");
+    dAASSERT((buildRequestFlags & (1U << dTRIDATAPREPROCESS_BUILD_FACE_ANGLES)) == 0 || requestExtraData == NULL || dIN_RANGE(requestExtraData[dTRIDATAPREPROCESS_BUILD_FACE_ANGLES], dTRIDATAPREPROCESS_FACE_ANGLES_EXTRA__MIN, dTRIDATAPREPROCESS_FACE_ANGLES_EXTRA__MAX));
+
+    dxTriMeshData *data = g;
+
+    bool buildUseFlags = (buildRequestFlags & (1U << dTRIDATAPREPROCESS_BUILD_CONCAVE_EDGES)) != 0;
+    FaceAngleStorageMethod faceAnglesRequirement = (buildRequestFlags & (1U << dTRIDATAPREPROCESS_BUILD_FACE_ANGLES)) != 0
+        ? g_TriMeshDataPreprocess_FaceAndlesExtraDataAngleStorageMethods.Encode(requestExtraData != NULL && dIN_RANGE(requestExtraData[dTRIDATAPREPROCESS_BUILD_FACE_ANGLES], dTRIDATAPREPROCESS_FACE_ANGLES_EXTRA__MIN, dTRIDATAPREPROCESS_FACE_ANGLES_EXTRA__MAX) ? (unsigned)requestExtraData[dTRIDATAPREPROCESS_BUILD_FACE_ANGLES] : dTRIDATAPREPROCESS_FACE_ANGLES_EXTRA__DEFAULT)
+        : ASM__INVALID;
+    return data->preprocessData(buildUseFlags, faceAnglesRequirement);
+}
+
+/*extern ODE_API */
+void dGeomTriMeshDataUpdate(dTriMeshDataID g) 
+{
+    dUASSERT(g, "The argument is not a trimesh data");
+
+    dxTriMeshData *data = g;
+    data->updateData();
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+
+/*extern ODE_API */
+void dGeomTriMeshSetCallback(dGeomID g, dTriCallback* Callback)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    mesh->assignCallback(Callback);
+}
+
+/*extern ODE_API */
+dTriCallback* dGeomTriMeshGetCallback(dGeomID g)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    const dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    return mesh->retrieveCallback();
+}
+
+/*extern ODE_API */
+void dGeomTriMeshSetArrayCallback(dGeomID g, dTriArrayCallback* ArrayCallback)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    mesh->assignArrayCallback(ArrayCallback);
+}
+
+/*extern ODE_API */
+dTriArrayCallback *dGeomTriMeshGetArrayCallback(dGeomID g)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    const dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    return mesh->retrieveArrayCallback();
+}
+
+/*extern ODE_API */
+void dGeomTriMeshSetRayCallback(dGeomID g, dTriRayCallback* Callback)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    mesh->assignRayCallback(Callback);
+}
+
+/*extern ODE_API */
+dTriRayCallback* dGeomTriMeshGetRayCallback(dGeomID g)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");	
+
+    const dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    return mesh->retrieveRayCallback();
+}
+
+/*extern ODE_API */
+void dGeomTriMeshSetTriMergeCallback(dGeomID g, dTriTriMergeCallback* Callback)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    mesh->assignTriMergeCallback(Callback);
+}
+
+/*extern ODE_API */
+dTriTriMergeCallback *dGeomTriMeshGetTriMergeCallback(dGeomID g)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");	
+
+    const dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    return mesh->retrieveTriMergeCallback();
+}
+
+/*extern ODE_API */
+void dGeomTriMeshSetData(dGeomID g, dTriMeshDataID Data)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    mesh->assignMeshData(Data);
+}
+
+/*extern ODE_API */
+dTriMeshDataID dGeomTriMeshGetData(dGeomID g)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    const dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    return mesh->retrieveMeshData();
+}
+
+
+BEGIN_NAMESPACE_OU();
+template<>
+const int CEnumSortedElementArray<dxTriMesh::TRIMESHTC, dxTriMesh::TTC__MAX, int, 0x161003D5>::m_aetElementArray[] =
+{
+    dSphereClass, // TTC_SPHERE,
+    dBoxClass, // TTC_BOX,
+    dCapsuleClass, // TTC_CAPSULE,
+};
+END_NAMESPACE_OU();
+static const CEnumSortedElementArray<dxTriMesh::TRIMESHTC, dxTriMesh::TTC__MAX, int, 0x161003D5> g_asiMeshTCGeomClasses;
+
+/*extern ODE_API */
+void dGeomTriMeshEnableTC(dGeomID g, int geomClass, int enable)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+
+    dxTriMesh::TRIMESHTC tc = g_asiMeshTCGeomClasses.Decode(geomClass);
+
+    if (g_asiMeshTCGeomClasses.IsValidDecode(tc))
+    {
+        mesh->assignDoTC(tc, enable != 0);
+    }
+}
+
+/*extern ODE_API */
+int dGeomTriMeshIsTCEnabled(dGeomID g, int geomClass)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    const dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+
+    dxTriMesh::TRIMESHTC tc = g_asiMeshTCGeomClasses.Decode(geomClass);
+
+    bool result = g_asiMeshTCGeomClasses.IsValidDecode(tc) 
+        && mesh->retrieveDoTC(tc);
+    return result;
+}
+
+
+/*extern ODE_API */
+dTriMeshDataID dGeomTriMeshGetTriMeshDataID(dGeomID g)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    const dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    return mesh->retrieveMeshData();
+}
+
+
+/*extern ODE_API */
+void dGeomTriMeshClearTCCache(dGeomID g)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    mesh->clearTCCache();
+}
+
+
+/*extern ODE_API */
+int dGeomTriMeshGetTriangleCount(dGeomID g)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    const dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    unsigned result = mesh->getMeshTriangleCount();
+    return result;
+}
+
+
+/*extern ODE_API */
+void dGeomTriMeshGetTriangle(dGeomID g, int index, dVector3 *v0/*=NULL*/, dVector3 *v1/*=NULL*/, dVector3 *v2/*=NULL*/)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+    dUASSERT(v0 != NULL || v1 != NULL || v2 != NULL, "A meaningless call");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+
+    dVector3 *pv[3] = { v0, v1, v2 };
+    mesh->fetchMeshTransformedTriangle(pv, index);
+}
+
+/*extern ODE_API */
+void dGeomTriMeshGetPoint(dGeomID g, int index, dReal u, dReal v, dVector3 Out)
+{
+    dUASSERT(g && g->type == dTriMeshClass, "The argument is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+
+    dVector3 dv[3];
+    mesh->fetchMeshTransformedTriangle(dv, index);
+
+    GetPointFromBarycentric(dv, u, v, Out);
+}
+
+
+/*extern */
+IFaceAngleStorageView *dxGeomTriMeshGetFaceAngleView(dxGeom *triMeshGeom)
+{
+    dUASSERT(triMeshGeom && triMeshGeom->type == dTriMeshClass, "The argument is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(triMeshGeom);
+    return mesh->retrieveFaceAngleView();
+}
+
+
+#endif // #if dTRIMESH_ENABLED
+
+
+//////////////////////////////////////////////////////////////////////////
+// Deprecated functions
+
+/*extern */
+void dGeomTriMeshDataGetBuffer(dTriMeshDataID g, unsigned char **buf, int *bufLen)
+{
+    sizeint dataSizeStorage;
+    void *dataPointer = dGeomTriMeshDataGet2(g, dTRIMESHDATA_USE_FLAGS, (bufLen != NULL ? &dataSizeStorage : NULL));
+
+    if (bufLen != NULL)
+    {
+        *bufLen = (int)dataSizeStorage;
+    }
+
+    if (buf != NULL)
+    {
+        *buf = (unsigned char *)dataPointer;
+    }
+}
+
+/*extern */
+void dGeomTriMeshDataSetBuffer(dTriMeshDataID g, unsigned char* buf)
+{
+    dGeomTriMeshDataSet(g, dTRIMESHDATA_USE_FLAGS, (void *)buf);
+}
+
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_internal.h b/libs/ode-0.16.1/ode/src/collision_trimesh_internal.h
new file mode 100644
index 0000000..477b770
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_internal.h
@@ -0,0 +1,399 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// TriMesh code by Erwin de Vries.
+// Modified for FreeSOLID Compatibility by Rodrigo Hernandez
+// TriMesh caches separation by Oleh Derevenko
+// TriMesh storage classes refactoring and face angle computation code by Oleh Derevenko (C) 2016-2019
+
+
+#ifndef _ODE_COLLISION_TRIMESH_INTERNAL_H_
+#define _ODE_COLLISION_TRIMESH_INTERNAL_H_
+
+
+//****************************************************************************
+// dxTriMesh class
+
+
+#include "collision_kernel.h"
+#include "collision_trimesh_colliders.h"
+#include "collision_util.h"
+#include <ode/collision_trimesh.h>
+
+#if dTLS_ENABLED
+#include "odetls.h"
+#endif
+
+
+struct TrimeshCollidersCache;
+struct dxTriMeshData;
+
+
+static inline 
+TrimeshCollidersCache *GetTrimeshCollidersCache(unsigned uiTLSKind)
+{
+#if dTLS_ENABLED
+    EODETLSKIND tkTLSKind = (EODETLSKIND)uiTLSKind;
+    return COdeTls::GetTrimeshCollidersCache(tkTLSKind);
+#else // dTLS_ENABLED
+    (void)uiTLSKind; // unused
+    extern TrimeshCollidersCache g_ccTrimeshCollidersCache;
+    return &g_ccTrimeshCollidersCache;
+#endif // dTLS_ENABLED
+}
+
+
+enum FaceAngleStorageMethod
+{
+    ASM__MIN,
+
+    ASM_BYTE_SIGNED = ASM__MIN,
+    ASM_BYTE_POSITIVE,
+    ASM_WORD_SIGNED,
+
+    ASM__MAX,
+
+    ASM__INVALID = ASM__MAX,
+};
+
+enum FaceAngleDomain
+{
+    FAD__MIN,
+
+    FAD_CONCAVE = FAD__MIN,
+
+    FAD__SIGNSTORED_IMPLICITVALUE_MIN,
+
+    FAD_FLAT = FAD__SIGNSTORED_IMPLICITVALUE_MIN,
+
+    FAD__SIGNSTORED_IMPLICITVALUE_MAX,
+
+    FAD__BYTEPOS_STORED_MIN = FAD__SIGNSTORED_IMPLICITVALUE_MAX,
+
+    FAD_CONVEX = FAD__BYTEPOS_STORED_MIN,
+
+    FAD__BYTEPOS_STORED_MAX,
+
+    EAD__MAX = FAD__BYTEPOS_STORED_MAX,
+};
+
+class IFaceAngleStorageControl
+{
+public:
+    virtual void disposeStorage() = 0;
+
+    virtual bool areNegativeAnglesStored() const = 0;
+
+    // This is to store angles between neighbor triangle normals as positive value for convex and negative for concave edges
+    virtual void assignFacesAngleIntoStorage(unsigned triangleIndex, dMeshTriangleVertex vertexIndex, dReal dAngleValue) = 0;
+};
+
+class IFaceAngleStorageView
+{
+public:
+    virtual FaceAngleDomain retrieveFacesAngleFromStorage(dReal &out_AngleValue, unsigned triangleIndex, dMeshTriangleVertex vertexIndex) = 0;
+};
+
+
+typedef dBase dxTriDataBase_Parent;
+struct dxTriDataBase:
+    public dxTriDataBase_Parent
+{
+public:
+    dxTriDataBase():
+        dxTriDataBase_Parent(),
+        m_vertices(NULL),
+        m_vertexStride(0),
+        m_vertexCount(0),
+        m_indices(NULL),
+        m_triangleCount(0),
+        m_triStride(0),
+        m_single(false),
+        m_normals(NULL),
+        m_faceAngles(NULL),
+        m_faceAngleView(NULL)
+    {
+#if !dTRIMESH_ENABLED
+        dUASSERT(false, "dTRIMESH_ENABLED is not defined. Trimesh geoms will not work");
+#endif
+    }
+
+    ~dxTriDataBase();
+
+    void buildData(const void *Vertices, int VertexStide, unsigned VertexCount, 
+        const void *Indices, unsigned IndexCount, int TriStride, 
+        const void *Normals, 
+        bool Single);
+
+
+public:
+    unsigned retrieveVertexCount() const { return m_vertexCount; }
+    int retrieveVertexStride() const { return m_vertexStride; }
+
+    unsigned retrieveTriangleCount() const { return m_triangleCount; }
+    int retrieveTriangleStride() const { return m_triStride; }
+
+protected:
+    const void *retrieveVertexInstances() const { return m_vertices; }
+    const void *retrieveTriangleVertexIndices() const { return m_indices; }
+    bool isSingle() const { return m_single; }
+
+public:
+    template<typename tcoordfloat, typename tindexint>
+    static void retrieveTriangleVertexPoints(dVector3 out_Points[dMTV__MAX], unsigned triangleIndex,
+        const tcoordfloat *vertexInstances, int vertexStride, const tindexint *triangleVertexIndices, int triangleStride);
+
+public:
+    void assignNormals(const void *normals) { m_normals = normals; }
+    const void *retrieveNormals() const { return m_normals; }
+
+    IFaceAngleStorageControl *retrieveFaceAngles() const { return m_faceAngles; }
+    IFaceAngleStorageView *retrieveFaceAngleView() const { return m_faceAngleView; }
+
+protected:
+    bool allocateFaceAngles(FaceAngleStorageMethod storageMethod);
+    void freeFaceAngles();
+
+    bool haveFaceAnglesBeenBuilt() const { return m_faceAngles != NULL; }
+
+public:
+    enum MeshComponentUseFlags
+    {
+        CUF__USE_EDGES_MIN = 0x01,
+        CUF_USE_FIRST_EDGE = CUF__USE_EDGES_MIN << dMTV_FIRST,
+        CUF_USE_SECOND_EDGE = CUF__USE_EDGES_MIN << dMTV_SECOND,
+        CUF_USE_THIRD_EDGE = CUF__USE_EDGES_MIN << dMTV_THIRD,
+        CUF__USE_EDGES_MAX = CUF__USE_EDGES_MIN << dMTV__MAX,
+        CUF__USE_ALL_EDGES = CUF_USE_FIRST_EDGE | CUF_USE_SECOND_EDGE | CUF_USE_THIRD_EDGE,
+
+        CUF__USE_VERTICES_MIN = CUF__USE_EDGES_MAX,
+        CUF_USE_FIRST_VERTEX = CUF__USE_VERTICES_MIN << dMTV_FIRST,
+        CUF_USE_SECOND_VERTEX = CUF__USE_VERTICES_MIN << dMTV_SECOND,
+        CUF_USE_THIRD_VERTEX = CUF__USE_VERTICES_MIN << dMTV_THIRD,
+        CUF__USE_VERTICES_LAST = CUF__USE_VERTICES_MIN << (dMTV__MAX - 1),
+        CUF__USE_VERTICES_MAX = CUF__USE_VERTICES_MIN << dMTV__MAX,
+        CUF__USE_ALL_VERTICES = CUF_USE_FIRST_VERTEX | CUF_USE_SECOND_VERTEX | CUF_USE_THIRD_VERTEX,
+
+        CUF__USE_ALL_COMPONENTS = CUF__USE_ALL_VERTICES | CUF__USE_ALL_EDGES,
+    };
+
+    // Make sure that the flags match the values declared in public interface
+    dSASSERT((unsigned)CUF_USE_FIRST_EDGE == dMESHDATAUSE_EDGE1);
+    dSASSERT((unsigned)CUF_USE_SECOND_EDGE == dMESHDATAUSE_EDGE2);
+    dSASSERT((unsigned)CUF_USE_THIRD_EDGE == dMESHDATAUSE_EDGE3);
+    dSASSERT((unsigned)CUF_USE_FIRST_VERTEX == dMESHDATAUSE_VERTEX1);
+    dSASSERT((unsigned)CUF_USE_SECOND_VERTEX == dMESHDATAUSE_VERTEX2);
+    dSASSERT((unsigned)CUF_USE_THIRD_VERTEX == dMESHDATAUSE_VERTEX3);
+
+protected:
+    struct EdgeRecord
+    {
+    public:
+        void setupEdge(dMeshTriangleVertex edgeIdx, int triIdx, const unsigned vertexIndices[dMTV__MAX]);
+
+        // Get the vertex opposite this edge in the triangle
+        dMeshTriangleVertex getOppositeVertexIndex() const
+        {
+            extern const CEnumUnsortedElementArray<unsigned, dxTriDataBase::CUF__USE_VERTICES_LAST / dxTriDataBase::CUF__USE_VERTICES_MIN, dMeshTriangleVertex, 0x161116DC> g_VertFlagOppositeIndices;
+
+            dMeshTriangleVertex oppositeIndex = g_VertFlagOppositeIndices.Encode(((m_vert1Flags | m_vert2Flags) ^ CUF__USE_ALL_VERTICES) / CUF__USE_VERTICES_MIN - 1);
+            dIASSERT(dIN_RANGE(oppositeIndex, dMTV__MIN, dMTV__MAX));
+
+            return oppositeIndex;
+        }
+
+        dMeshTriangleVertex getEdgeStartVertexIndex() const
+        {
+            extern const CEnumUnsortedElementArray<unsigned, dxTriDataBase::CUF__USE_VERTICES_LAST / dxTriDataBase::CUF__USE_VERTICES_MIN, dMeshTriangleVertex, 0x161225E9> g_VertFlagEdgeStartIndices;
+
+            dMeshTriangleVertex startIndex = g_VertFlagEdgeStartIndices.Encode(((m_vert1Flags | m_vert2Flags) ^ CUF__USE_ALL_VERTICES) / CUF__USE_VERTICES_MIN - 1);
+            dIASSERT(dIN_RANGE(startIndex, dMTV__MIN, dMTV__MAX));
+
+            return startIndex;
+        }
+
+    public:
+        bool operator <(const EdgeRecord &anotherEdge) const { return m_vertIdx1 < anotherEdge.m_vertIdx1 || (m_vertIdx1 == anotherEdge.m_vertIdx1 && m_vertIdx2 < anotherEdge.m_vertIdx2); }
+
+    public:
+        enum
+        {
+            AVF_VERTEX_USED             = 0x01,
+            AVF_VERTEX_HAS_CONCAVE_EDGE = 0x02,
+        };
+
+    public:
+        unsigned m_vertIdx1;	// Index into vertex array for this edges vertices
+        unsigned m_vertIdx2;
+        unsigned m_triIdx;		// Index into triangle array for triangle this edge belongs to
+
+        uint8 m_edgeFlags;	
+        uint8 m_vert1Flags;
+        uint8 m_vert2Flags;
+        uint8 m_absVertexFlags;
+    };
+
+    struct VertexRecord
+    {
+        unsigned m_UsedFromEdgeIndex;
+    };
+
+    template<class TMeshDataAccessor>
+    static void meaningfulPreprocess_SetupEdgeRecords(EdgeRecord *edges, sizeint numEdges, const TMeshDataAccessor &dataAccessor);
+    template<class TMeshDataAccessor>
+    static void meaningfulPreprocess_buildEdgeFlags(uint8 *useFlags/*=NULL*/, IFaceAngleStorageControl *faceAngles/*=NULL*/, 
+        EdgeRecord *edges, sizeint numEdges, VertexRecord *vertices, 
+        const dReal *externalNormals, const TMeshDataAccessor &dataAccessor);
+    static void buildBoundaryEdgeAngle(IFaceAngleStorageControl *faceAngles, EdgeRecord *currEdge);
+    template<class TMeshDataAccessor>
+    static void buildConcaveEdgeAngle(IFaceAngleStorageControl *faceAngles, bool negativeAnglesStored, 
+        EdgeRecord *currEdge, const dReal &normalSegmentDot, const dReal &lengthSquareProduct,
+        const dVector3 &triangleNormal, const dVector3 &secondOppositeVertexSegment,
+        const dVector3 *pSecondTriangleMatchingEdge/*=NULL*/, const dVector3 *pFirstTriangle/*=NULL*/, 
+        const TMeshDataAccessor &dataAccessor);
+    template<class TMeshDataAccessor>
+    static 
+    void buildConvexEdgeAngle(IFaceAngleStorageControl *faceAngles, 
+        EdgeRecord *currEdge, const dReal &normalSegmentDot, const dReal &lengthSquareProduct,
+        const dVector3 &triangleNormal, const dVector3 &secondOppositeVertexSegment,
+        const dVector3 *pSecondTriangleMatchingEdge/*=NULL*/, const dVector3 *pFirstTriangle/*=NULL*/, 
+        const TMeshDataAccessor &dataAccessor);
+    template<class TMeshDataAccessor>
+    static dReal calculateEdgeAngleValidated(unsigned firstVertexStartIndex,
+        EdgeRecord *currEdge, const dReal &normalSegmentDot, const dReal &lengthSquareProduct,
+        const dVector3 &triangleNormal, const dVector3 &secondOppositeVertexSegment,
+        const dVector3 *pSecondTriangleMatchingEdge/*=NULL*/, const dVector3 *pFirstTriangle/*=NULL*/, 
+        const TMeshDataAccessor &dataAccessor);
+
+private:
+    const void *m_vertices;
+    int m_vertexStride;
+    unsigned m_vertexCount;
+    const void *m_indices;
+    unsigned m_triangleCount;
+    int m_triStride;
+    bool m_single;
+
+private:
+    const void *m_normals;
+    IFaceAngleStorageControl *m_faceAngles;
+    IFaceAngleStorageView *m_faceAngleView; 
+};
+
+
+typedef dxGeom dxMeshBase_Parent;
+struct dxMeshBase:
+    public dxMeshBase_Parent
+{
+public:
+    dxMeshBase(dxSpace *Space, dxTriDataBase *Data, 
+        dTriCallback *Callback, dTriArrayCallback *ArrayCallback, dTriRayCallback *RayCallback, 
+        bool doTCs=false):
+        dxMeshBase_Parent(Space, 1),
+        m_Callback(Callback),
+        m_ArrayCallback(ArrayCallback),
+        m_RayCallback(RayCallback),
+        m_TriMergeCallback(NULL),
+        m_Data(Data)
+    {
+        std::fill(m_DoTCs, m_DoTCs + dARRAY_SIZE(m_DoTCs), doTCs);
+        type = dTriMeshClass;
+    }
+
+    bool invokeCallback(dxGeom *Object, int TriIndex)
+    {
+        return m_Callback == NULL || m_Callback(this, Object, TriIndex) != 0;
+    }
+
+public:
+    enum TRIMESHTC
+    {
+        TTC__MIN,
+
+        TTC_SPHERE = TTC__MIN,
+        TTC_BOX,
+        TTC_CAPSULE,
+
+        TTC__MAX,
+    };
+
+public:
+    void assignCallback(dTriCallback *value) { m_Callback = value; }
+    dTriCallback *retrieveCallback() const { return m_Callback; }
+
+    void assignArrayCallback(dTriArrayCallback *value) { m_ArrayCallback = value; }
+    dTriArrayCallback *retrieveArrayCallback() const { return m_ArrayCallback; }
+
+    void assignRayCallback(dTriRayCallback *value) { m_RayCallback = value; }
+    dTriRayCallback *retrieveRayCallback() const { return m_RayCallback; }
+
+    void assignTriMergeCallback(dTriTriMergeCallback *value) { m_TriMergeCallback = value; }
+    dTriTriMergeCallback *retrieveTriMergeCallback() const { return m_TriMergeCallback; }
+
+    void assignMeshData(dxTriDataBase *instance)
+    {
+        setMeshData(instance);
+        // I changed my data -- I know nothing about my own AABB anymore.
+        markAABBBad();
+    }
+    dxTriDataBase *retrieveMeshData() const { return getMeshData(); }
+
+    IFaceAngleStorageControl *retrieveFaceAngleStorage() const { return m_Data->retrieveFaceAngles(); }
+    IFaceAngleStorageView *retrieveFaceAngleView() const { return m_Data->retrieveFaceAngleView(); }
+
+    void assignDoTC(TRIMESHTC tc, bool value) { setDoTC(tc, value); }
+    bool retrieveDoTC(TRIMESHTC tc) const { return getDoTC(tc); }
+
+public:
+    void setDoTC(TRIMESHTC tc, bool value) { dIASSERT(dIN_RANGE(tc, TTC__MIN, TTC__MAX)); m_DoTCs[tc] = value; }
+    bool getDoTC(TRIMESHTC tc) const { dIASSERT(dIN_RANGE(tc, TTC__MIN, TTC__MAX)); return m_DoTCs[tc]; }
+
+private:
+    void setMeshData(dxTriDataBase *Data) { m_Data = Data; }
+
+protected:
+    dxTriDataBase *getMeshData() const { return m_Data; }
+
+public:
+    // Callbacks
+    dTriCallback *m_Callback;
+    dTriArrayCallback *m_ArrayCallback;
+    dTriRayCallback *m_RayCallback;
+    dTriTriMergeCallback *m_TriMergeCallback;
+
+private:
+    // Data types
+    dxTriDataBase *m_Data;
+
+public:
+    bool m_DoTCs[TTC__MAX];
+};
+
+
+IFaceAngleStorageView *dxGeomTriMeshGetFaceAngleView(dxGeom *triMeshGeom);
+
+
+#include "collision_trimesh_gimpact.h"
+#include "collision_trimesh_opcode.h"
+
+
+#endif	//_ODE_COLLISION_TRIMESH_INTERNAL_H_
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_internal_impl.h b/libs/ode-0.16.1/ode/src/collision_trimesh_internal_impl.h
new file mode 100644
index 0000000..be41ff5
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_internal_impl.h
@@ -0,0 +1,463 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// TriMesh base template method implementations by Oleh Derevenko (C) 2016-2019
+
+
+#ifndef _ODE_COLLISION_TRIMESH_INTERNAL_IMPL_H_
+#define _ODE_COLLISION_TRIMESH_INTERNAL_IMPL_H_
+
+
+#include "collision_trimesh_internal.h"
+
+
+#if dTRIMESH_ENABLED
+
+
+template<typename tcoordfloat, typename tindexint>
+/*static */
+void dxTriDataBase::retrieveTriangleVertexPoints(dVector3 out_Points[dMTV__MAX], unsigned triangleIndex,
+    const tcoordfloat *vertexInstances, int vertexStride, const tindexint *triangleVertexIndices, int triangleStride)
+{
+    const tindexint *triangleIndicesOfInterest = (const tindexint *)((uint8 *)triangleVertexIndices + (sizeint)triangleIndex * triangleStride);
+    for (unsigned trianglePoint = dMTV__MIN; trianglePoint != dMTV__MAX; ++trianglePoint)
+    {
+        unsigned vertexIndex = triangleIndicesOfInterest[trianglePoint];
+        tcoordfloat *pointVertex = (tcoordfloat *)((uint8 *)vertexInstances + (sizeint)vertexIndex * vertexStride);
+        dAssignVector3(out_Points[trianglePoint], (dReal)pointVertex[dSA_X], (dReal)pointVertex[dSA_Y], (dReal)pointVertex[dSA_Z]);
+        dSASSERT(dSA_X == 0);
+        dSASSERT(dSA_Y == 1);
+        dSASSERT(dSA_Z == 2);
+    }
+}
+
+
+template<class TMeshDataAccessor>
+/*static */
+void dxTriDataBase::meaningfulPreprocess_SetupEdgeRecords(EdgeRecord *edges, sizeint numEdges, const TMeshDataAccessor &dataAccessor)
+{
+    unsigned vertexIndices[dMTV__MAX];
+    // Make a list of every edge in the mesh
+    unsigned triangleIdx = 0;
+    for (sizeint edgeIdx = 0; edgeIdx != numEdges; ++triangleIdx, edgeIdx += dMTV__MAX)
+    {
+        dataAccessor.getTriangleVertexIndices(vertexIndices, triangleIdx);
+        edges[edgeIdx + dMTV_FIRST].setupEdge(dMTV_FIRST, triangleIdx, vertexIndices);
+        edges[edgeIdx + dMTV_SECOND].setupEdge(dMTV_SECOND, triangleIdx, vertexIndices);
+        edges[edgeIdx + dMTV_THIRD].setupEdge(dMTV_THIRD, triangleIdx, vertexIndices);
+    }
+}
+
+template<class TMeshDataAccessor>
+/*static */
+void dxTriDataBase::meaningfulPreprocess_buildEdgeFlags(uint8 *useFlags/*=NULL*/, IFaceAngleStorageControl *faceAngles/*=NULL*/, 
+    EdgeRecord *edges, sizeint numEdges, VertexRecord *vertices, 
+    const dReal *externalNormals/*=NULL*/, const TMeshDataAccessor &dataAccessor)
+{
+    dIASSERT(useFlags != NULL || faceAngles != NULL);
+    dIASSERT(numEdges != 0);
+
+    const bool negativeAnglesStored = faceAngles != NULL && faceAngles->areNegativeAnglesStored();
+
+    // Go through the sorted list of edges and flag all the edges and vertices that we need to use
+    EdgeRecord *const lastEdge = edges + (numEdges - 1);
+    for (EdgeRecord *currEdge = edges; ; ++currEdge)
+    {
+        // Handle the last edge separately to have an optimizer friendly loop
+        if (currEdge >= lastEdge)
+        {
+            // This is a boundary edge
+            if (currEdge == lastEdge)
+            {
+                if (faceAngles != NULL)
+                {
+                    buildBoundaryEdgeAngle(faceAngles, currEdge);
+                }
+
+                if (useFlags != NULL)
+                {
+                    // For the last element EdgeRecord::kAbsVertexUsed assignment can be skipped as noone is going to need it any more
+                    useFlags[currEdge[0].m_triIdx] |= ((edges[currEdge[0].m_vertIdx1].m_absVertexFlags & EdgeRecord::AVF_VERTEX_USED) == 0 ? currEdge[0].m_vert1Flags : 0) 
+                        | ((edges[currEdge[0].m_vertIdx2].m_absVertexFlags & EdgeRecord::AVF_VERTEX_USED) == 0 ? currEdge[0].m_vert2Flags : 0)
+                        | currEdge[0].m_edgeFlags;
+                }
+            }
+
+            break;
+        }
+
+        unsigned vertIdx1 = currEdge[0].m_vertIdx1;
+        unsigned vertIdx2 = currEdge[0].m_vertIdx2;
+
+        if (vertIdx2 == currEdge[1].m_vertIdx2 // Check second vertex first as it is more likely to change taking the sorting rules into account
+            && vertIdx1 == currEdge[1].m_vertIdx1)
+        {
+            // We let the dot threshold for concavity get slightly negative to allow for rounding errors
+            const float kConcaveThreshold = 0.000001f;
+
+            const dVector3 *pSecondTriangleEdgeToUse = NULL, *pFirstTriangleToUse = NULL;
+            dVector3 secondTriangleMatchingEdge;
+            dVector3 firstTriangle[dMTV__MAX];
+            dVector3 secondOppositeVertexSegment, triangleNormal;
+            dReal lengthSquareProduct, secondOppositeSegmentLengthSquare;
+
+            // Calculate orthogonal vector from the matching edge of the second triangle to its opposite point
+            {
+                dVector3 secondTriangle[dMTV__MAX];
+                dataAccessor.getTriangleVertexPoints(secondTriangle, currEdge[1].m_triIdx);
+
+                // Get the vertex opposite this edge in the second triangle
+                dMeshTriangleVertex secondOppositeVertex = currEdge[1].getOppositeVertexIndex();
+                dMeshTriangleVertex secondEdgeStart = secondOppositeVertex + 1 != dMTV__MAX ? (dMeshTriangleVertex)(secondOppositeVertex + 1) : dMTV__MIN;
+                dMeshTriangleVertex secondEdgeEnd = (dMeshTriangleVertex)(dMTV_FIRST + dMTV_SECOND + dMTV_THIRD - secondEdgeStart - secondOppositeVertex);
+
+                dSubtractVectors3(secondTriangleMatchingEdge, secondTriangle[secondEdgeEnd], secondTriangle[secondEdgeStart]);
+
+                if (dSafeNormalize3(secondTriangleMatchingEdge))
+                {
+                    pSecondTriangleEdgeToUse = &secondTriangleMatchingEdge;
+
+                    dVector3 secondTriangleOppositeEdge;
+                    dSubtractVectors3(secondTriangleOppositeEdge, secondTriangle[secondOppositeVertex], secondTriangle[secondEdgeStart]);
+                    dReal dProjectionLength = dCalcVectorDot3(secondTriangleOppositeEdge, secondTriangleMatchingEdge);
+                    dAddVectorScaledVector3(secondOppositeVertexSegment, secondTriangleOppositeEdge, secondTriangleMatchingEdge, -dProjectionLength);
+                }
+                else
+                {
+                    dSubtractVectors3(secondOppositeVertexSegment, secondTriangle[secondOppositeVertex], secondTriangle[secondEdgeStart]);
+                }
+
+                secondOppositeSegmentLengthSquare = dCalcVectorLengthSquare3(secondOppositeVertexSegment);
+            }
+
+            // Either calculate the normal from triangle vertices...
+            if (externalNormals == NULL)
+            {
+                // Get the normal of the first triangle
+                dataAccessor.getTriangleVertexPoints(firstTriangle, currEdge[0].m_triIdx);
+                pFirstTriangleToUse = &firstTriangle[dMTV__MIN];
+
+                dVector3 firstEdge, secondEdge;
+                dSubtractVectors3(secondEdge, firstTriangle[dMTV_THIRD], firstTriangle[dMTV_SECOND]);
+                dSubtractVectors3(firstEdge, firstTriangle[dMTV_FIRST], firstTriangle[dMTV_SECOND]);
+                dCalcVectorCross3(triangleNormal, secondEdge, firstEdge);
+                dReal normalLengthSuqare = dCalcVectorLengthSquare3(triangleNormal);
+                lengthSquareProduct = secondOppositeSegmentLengthSquare * normalLengthSuqare;
+            }
+            // ...or use the externally supplied normals
+            else
+            {
+                const dReal *pTriangleExternalNormal = externalNormals + currEdge[0].m_triIdx * dSA__MAX;
+                dAssignVector3(triangleNormal, pTriangleExternalNormal[dSA_X], pTriangleExternalNormal[dSA_Y], pTriangleExternalNormal[dSA_Z]);
+                // normalLengthSuqare = REAL(1.0);
+                dUASSERT(dFabs(dCalcVectorLengthSquare3(triangleNormal) - REAL(1.0)) < REAL(0.25) * kConcaveThreshold * kConcaveThreshold, "Mesh triangle normals must be normalized");
+
+                lengthSquareProduct = secondOppositeSegmentLengthSquare/* * normalLengthSuqare*/;
+            }
+
+            dReal normalSegmentDot = dCalcVectorDot3(triangleNormal, secondOppositeVertexSegment);
+
+            // This is a concave edge, leave it for the next pass
+            // OD: This is the "dot >= kConcaveThresh" check, but since the vectros were not normalized to save on roots and divisions,
+            // the check against zero is performed first and then the dot product is squared and compared against the threshold multiplied by lengths' squares
+            // OD: Originally, there was dot > -kConcaveThresh check, but this does not seem to be a good idea
+            // as it can mark all edges on potentially large (nearly) flat surfaces concave.
+            if (normalSegmentDot > REAL(0.0) && normalSegmentDot * normalSegmentDot >= kConcaveThreshold * kConcaveThreshold * lengthSquareProduct)
+            {
+                if (faceAngles != NULL)
+                {
+                    buildConcaveEdgeAngle(faceAngles, negativeAnglesStored, currEdge, normalSegmentDot, lengthSquareProduct,
+                        triangleNormal, secondOppositeVertexSegment,
+                        pSecondTriangleEdgeToUse, pFirstTriangleToUse, dataAccessor);
+                }
+
+                if (useFlags != NULL)
+                {
+                    // Mark the vertices of a concave edge to prevent their use
+                    unsigned absVertexFlags1 = edges[vertIdx1].m_absVertexFlags;
+                    edges[vertIdx1].m_absVertexFlags |= absVertexFlags1 | EdgeRecord::AVF_VERTEX_HAS_CONCAVE_EDGE | EdgeRecord::AVF_VERTEX_USED;
+
+                    if ((absVertexFlags1 & (EdgeRecord::AVF_VERTEX_HAS_CONCAVE_EDGE | EdgeRecord::AVF_VERTEX_USED)) == EdgeRecord::AVF_VERTEX_USED)
+                    {
+                        // If the vertex was already used from other triangles but then discovered 
+                        // to have a concave edge, unmark the previous use
+                        unsigned usedFromEdgeIndex = vertices[vertIdx1].m_UsedFromEdgeIndex;
+                        const EdgeRecord *usedFromEdge = edges + usedFromEdgeIndex;
+                        unsigned usedInTriangleIndex = usedFromEdge->m_triIdx;
+                        uint8 usedVertFlags = usedFromEdge->m_vertIdx1 == vertIdx1 ? usedFromEdge->m_vert1Flags : usedFromEdge->m_vert2Flags;
+                        useFlags[usedInTriangleIndex] ^= usedVertFlags;
+                        dIASSERT((useFlags[usedInTriangleIndex] & usedVertFlags) == 0);
+                    }
+
+                    unsigned absVertexFlags2 = edges[vertIdx2].m_absVertexFlags;
+                    edges[vertIdx2].m_absVertexFlags = absVertexFlags2 | EdgeRecord::AVF_VERTEX_HAS_CONCAVE_EDGE | EdgeRecord::AVF_VERTEX_USED;
+
+                    if ((absVertexFlags2 & (EdgeRecord::AVF_VERTEX_HAS_CONCAVE_EDGE | EdgeRecord::AVF_VERTEX_USED)) == EdgeRecord::AVF_VERTEX_USED)
+                    {
+                        // Similarly unmark the possible previous use of the edge's second vertex
+                        unsigned usedFromEdgeIndex = vertices[vertIdx2].m_UsedFromEdgeIndex;
+                        const EdgeRecord *usedFromEdge = edges + usedFromEdgeIndex;
+                        unsigned usedInTriangleIndex = usedFromEdge->m_triIdx;
+                        uint8 usedVertFlags = usedFromEdge->m_vertIdx1 == vertIdx2 ? usedFromEdge->m_vert1Flags : usedFromEdge->m_vert2Flags;
+                        useFlags[usedInTriangleIndex] ^= usedVertFlags;
+                        dIASSERT((useFlags[usedInTriangleIndex] & usedVertFlags) == 0);
+                    }
+                }
+            }
+            // If this is a convex edge, mark its vertices and edge as used
+            else
+            {
+                if (faceAngles != NULL)
+                {
+                    buildConvexEdgeAngle(faceAngles, currEdge, normalSegmentDot, lengthSquareProduct,
+                        triangleNormal, secondOppositeVertexSegment,
+                        pSecondTriangleEdgeToUse, pFirstTriangleToUse, dataAccessor);
+                }
+
+                if (useFlags != NULL)
+                {
+                    EdgeRecord *edgeToUse = currEdge;
+                    unsigned triIdx = edgeToUse[0].m_triIdx;
+                    unsigned triIdx1 = edgeToUse[1].m_triIdx;
+                    
+                    unsigned triUseFlags = useFlags[triIdx];
+                    unsigned triUseFlags1 = useFlags[triIdx1];
+
+                    // Choose to add flags to the bitmask that already has more edges
+                    // (to group flags in selected triangles rather than scattering them evenly)
+                    if ((triUseFlags1 & CUF__USE_ALL_EDGES) > (triUseFlags & CUF__USE_ALL_EDGES))
+                    {
+                        triIdx = triIdx1;
+                        triUseFlags = triUseFlags1;
+                        edgeToUse = edgeToUse + 1;
+                    }
+
+                    if ((edges[vertIdx1].m_absVertexFlags & EdgeRecord::AVF_VERTEX_USED) == 0)
+                    {
+                        // Only add each vertex once and set a mark to prevent further additions
+                        edges[vertIdx1].m_absVertexFlags |= EdgeRecord::AVF_VERTEX_USED;
+                        // Also remember the index the vertex flags are going to be applied to 
+                        // to allow easily clear the vertex from the use flags if any concave edges are found to connect to it
+                        vertices[vertIdx1].m_UsedFromEdgeIndex = (unsigned)(edgeToUse - edges);
+                        triUseFlags |= edgeToUse[0].m_vert1Flags;
+                    }
+
+                    // Same processing for the second vertex...
+                    if ((edges[vertIdx2].m_absVertexFlags & EdgeRecord::AVF_VERTEX_USED) == 0)
+                    {
+                        edges[vertIdx2].m_absVertexFlags |= EdgeRecord::AVF_VERTEX_USED;
+                        vertices[vertIdx2].m_UsedFromEdgeIndex = (unsigned)(edgeToUse - edges);
+                        triUseFlags |= edgeToUse[0].m_vert2Flags;
+                    }
+
+                    // And finally store the use flags adding the edge flags in
+                    useFlags[triIdx] = triUseFlags | edgeToUse[0].m_edgeFlags;
+                }
+            }
+
+            // Skip the second edge
+            ++currEdge;
+        }
+        // This is a boundary edge
+        else
+        {
+            if (faceAngles != NULL)
+            {
+                buildBoundaryEdgeAngle(faceAngles, currEdge);
+            }
+
+            if (useFlags != NULL)
+            {
+                unsigned triIdx = currEdge[0].m_triIdx;
+                unsigned triUseExtraFlags = 0;
+                
+                if ((edges[vertIdx1].m_absVertexFlags & EdgeRecord::AVF_VERTEX_USED) == 0)
+                {
+                    edges[vertIdx1].m_absVertexFlags |= EdgeRecord::AVF_VERTEX_USED;
+                    vertices[vertIdx1].m_UsedFromEdgeIndex = (unsigned)(currEdge - edges);
+                    triUseExtraFlags |= currEdge[0].m_vert1Flags;
+                }
+
+                if ((edges[vertIdx2].m_absVertexFlags & EdgeRecord::AVF_VERTEX_USED) == 0)
+                {
+                    edges[vertIdx2].m_absVertexFlags |= EdgeRecord::AVF_VERTEX_USED;
+                    vertices[vertIdx2].m_UsedFromEdgeIndex = (unsigned)(currEdge - edges);
+                    triUseExtraFlags |= currEdge[0].m_vert2Flags;
+                }
+
+                useFlags[triIdx] |= triUseExtraFlags | currEdge[0].m_edgeFlags;
+            }
+        }
+    }
+}
+
+/*static */
+void dxTriDataBase::buildBoundaryEdgeAngle(IFaceAngleStorageControl *faceAngles, 
+    EdgeRecord *currEdge)
+{
+    const dReal faceAngle = REAL(0.0);
+
+    dMeshTriangleVertex firstVertexStartIndex = currEdge[0].getEdgeStartVertexIndex();
+    faceAngles->assignFacesAngleIntoStorage(currEdge[0].m_triIdx, firstVertexStartIndex, faceAngle);
+    // -- For boundary edges, only the first element is valid
+    // dMeshTriangleVertex secondVertexStartIndex = currEdge[1].getEdgeStartVertexIndex();
+    // faceAngles->assignFacesAngleIntoStorage(currEdge[1].m_TriIdx, secondVertexStartIndex, faceAngle);
+}
+
+template<class TMeshDataAccessor>
+/*static */
+void dxTriDataBase::buildConcaveEdgeAngle(IFaceAngleStorageControl *faceAngles, bool negativeAnglesStored, 
+    EdgeRecord *currEdge, const dReal &normalSegmentDot, const dReal &lengthSquareProduct,
+    const dVector3 &triangleNormal, const dVector3 &secondOppositeVertexSegment,
+    const dVector3 *pSecondTriangleMatchingEdge/*=NULL*/, const dVector3 *pFirstTriangle/*=NULL*/, 
+    const TMeshDataAccessor &dataAccessor)
+{
+    dReal faceAngle;
+    dMeshTriangleVertex firstVertexStartIndex = currEdge[0].getEdgeStartVertexIndex();
+
+    // Check if concave angles are stored at all
+    if (negativeAnglesStored)
+    {
+        // The length square product can become zero due to precision loss
+        // when both the normal and the opposite edge vectors are very small.
+        if (lengthSquareProduct != REAL(0.0))
+        {
+            faceAngle = -calculateEdgeAngleValidated(firstVertexStartIndex,
+                currEdge, normalSegmentDot, lengthSquareProduct, triangleNormal, secondOppositeVertexSegment,
+                pSecondTriangleMatchingEdge, pFirstTriangle, dataAccessor);
+        }
+        else
+        {
+            faceAngle = REAL(0.0);
+        }
+    }
+    else
+    {
+        // If concave angles ate not stored, set an arbitrary negative value
+        faceAngle = -(dReal)M_PI;
+    }
+
+    faceAngles->assignFacesAngleIntoStorage(currEdge[0].m_triIdx, firstVertexStartIndex, faceAngle);
+    dMeshTriangleVertex secondVertexStartIndex = currEdge[1].getEdgeStartVertexIndex();
+    faceAngles->assignFacesAngleIntoStorage(currEdge[1].m_triIdx, secondVertexStartIndex, faceAngle);
+}
+
+template<class TMeshDataAccessor>
+/*static */
+void dxTriDataBase::buildConvexEdgeAngle(IFaceAngleStorageControl *faceAngles, 
+    EdgeRecord *currEdge, const dReal &normalSegmentDot, const dReal &lengthSquareProduct,
+    const dVector3 &triangleNormal, const dVector3 &secondOppositeVertexSegment,
+    const dVector3 *pSecondTriangleMatchingEdge/*=NULL*/, const dVector3 *pFirstTriangle/*=NULL*/, 
+    const TMeshDataAccessor &dataAccessor)
+{
+    dReal faceAngle;
+    dMeshTriangleVertex firstVertexStartIndex = currEdge[0].getEdgeStartVertexIndex();
+
+    // The length square product can become zero due to precision loss
+    // when both the normal and the opposite edge vectors are very small.
+    if (normalSegmentDot < REAL(0.0) && lengthSquareProduct != REAL(0.0))
+    {
+        faceAngle = calculateEdgeAngleValidated(firstVertexStartIndex,
+            currEdge, -normalSegmentDot, lengthSquareProduct, triangleNormal, secondOppositeVertexSegment,
+            pSecondTriangleMatchingEdge, pFirstTriangle, dataAccessor);
+    }
+    else
+    {
+        faceAngle = REAL(0.0);
+    }
+
+    faceAngles->assignFacesAngleIntoStorage(currEdge[0].m_triIdx, firstVertexStartIndex, faceAngle);
+    dMeshTriangleVertex secondVertexStartIndex = currEdge[1].getEdgeStartVertexIndex();
+    faceAngles->assignFacesAngleIntoStorage(currEdge[1].m_triIdx, secondVertexStartIndex, faceAngle);
+}
+
+template<class TMeshDataAccessor>
+/*static */
+dReal dxTriDataBase::calculateEdgeAngleValidated(unsigned firstVertexStartIndex,
+    EdgeRecord *currEdge, const dReal &normalSegmentDot, const dReal &lengthSquareProduct,
+    const dVector3 &triangleNormal, const dVector3 &secondOppositeVertexSegment,
+    const dVector3 *pSecondTriangleMatchingEdge/*=NULL*/, const dVector3 *pFirstTriangle/*=NULL*/, 
+    const TMeshDataAccessor &dataAccessor)
+{
+    dIASSERT(lengthSquareProduct >= REAL(0.0));
+
+    dReal result;
+    dReal angleCosine = normalSegmentDot / dSqrt(lengthSquareProduct);
+
+    if (angleCosine < REAL(1.0))
+    {
+        dVector3 normalSecondOppositeSegmentCross;
+        dCalcVectorCross3(normalSecondOppositeSegmentCross, triangleNormal, secondOppositeVertexSegment);
+
+        dReal secondTriangleEdgeDirectionCheck;
+
+        if (pSecondTriangleMatchingEdge != NULL)
+        {
+            // Check the cross product against the second triangle edge, if possible...
+            secondTriangleEdgeDirectionCheck = dCalcVectorDot3(normalSecondOppositeSegmentCross, *pSecondTriangleMatchingEdge);
+        }
+        else
+        {
+            // ...if not, calculate the supposed direction of the second triangle's edge 
+            // as negative of first triangle edge. For that cross-multiply the precomputed
+            // first triangle normal by vector from the degenerate edge to its opposite vertex.
+
+            // Retrieve the first triangle points if necessary
+            dVector3 firstTriangleStorage[dMTV__MAX];
+            const dVector3 *pFirstTriangleToUse = pFirstTriangle;
+
+            if (pFirstTriangle == NULL)
+            {
+                dataAccessor.getTriangleVertexPoints(firstTriangleStorage, currEdge[0].m_triIdx);
+                pFirstTriangleToUse = &firstTriangleStorage[dMTV__MIN];
+            }
+
+            // Calculate the opposite vector
+            unsigned firstTriangleOppositeIndex = firstVertexStartIndex != dMTV__MIN ? firstVertexStartIndex - 1 : dMTV__MAX - 1;
+
+            dVector3 firstOppositeVertexSegment;
+            dSubtractVectors3(firstOppositeVertexSegment, pFirstTriangleToUse[firstTriangleOppositeIndex], pFirstTriangleToUse[firstVertexStartIndex]);
+
+            dVector3 normalFirstOppositeSegmentCross;
+            dCalcVectorCross3(normalFirstOppositeSegmentCross, triangleNormal, firstOppositeVertexSegment);
+
+            // And finally calculate the dot product to compare vector directions
+            secondTriangleEdgeDirectionCheck = dCalcVectorDot3(normalSecondOppositeSegmentCross, normalFirstOppositeSegmentCross);
+        }
+
+        // Negative product means the angle absolute value is less than M_PI_2, positive - greater.
+        result = secondTriangleEdgeDirectionCheck < REAL(0.0) ? dAsin(angleCosine) : (dReal)M_PI_2 + dAcos(angleCosine);
+    }
+    else
+    {
+        result = (dReal)M_PI_2;
+        dIASSERT(angleCosine - REAL(1.0) < 1e-4); // The computational error can not be too high because the dot product had been verified to be greater than the concave threshold above
+    }
+
+    return result;
+}
+
+
+#endif // #if dTRIMESH_ENABLED
+
+
+#endif // #ifndef _ODE_COLLISION_TRIMESH_INTERNAL_IMPL_H_
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_opcode.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_opcode.cpp
new file mode 100644
index 0000000..53d8b0f
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_opcode.cpp
@@ -0,0 +1,767 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// TriMesh code by Erwin de Vries.
+// TriMesh storage classes refactoring and face angle computation code by Oleh Derevenko (C) 2016-2019
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+
+
+#if dTRIMESH_ENABLED && dTRIMESH_OPCODE
+
+#include "collision_util.h"
+#include "collision_trimesh_opcode.h"
+#include "collision_trimesh_internal_impl.h"
+#include <algorithm>
+
+
+//////////////////////////////////////////////////////////////////////////
+// TrimeshCollidersCache
+
+void TrimeshCollidersCache::initOPCODECaches()
+{
+    m_RayCollider.SetDestination(&m_Faces);
+
+    /* -- not used
+    _PlanesCollider.SetTemporalCoherence(true);
+    */
+
+    m_SphereCollider.SetTemporalCoherence(true);
+    m_SphereCollider.SetPrimitiveTests(false);
+
+    m_OBBCollider.SetTemporalCoherence(true);
+
+    // no first-contact test (i.e. return full contact info)
+    m_AABBTreeCollider.SetFirstContact( false );     
+    // temporal coherence only works with "first contact" tests
+    m_AABBTreeCollider.SetTemporalCoherence(false);
+    // Perform full BV-BV tests (true) or SAT-lite tests (false)
+    m_AABBTreeCollider.SetFullBoxBoxTest( true );
+    // Perform full Primitive-BV tests (true) or SAT-lite tests (false)
+    m_AABBTreeCollider.SetFullPrimBoxTest( true );
+    const char* msg;
+    if ((msg =m_AABBTreeCollider.ValidateSettings()))
+    {
+        dDebug (d_ERR_UASSERT, msg, " (%s:%d)", __FILE__,__LINE__);
+    }
+
+    /* -- not used
+    _LSSCollider.SetTemporalCoherence(false);
+    _LSSCollider.SetPrimitiveTests(false);
+    _LSSCollider.SetFirstContact(false);
+    */
+}
+
+void TrimeshCollidersCache::clearOPCODECaches()
+{
+    m_Faces.Empty();
+    m_DefaultSphereCache.TouchedPrimitives.Empty();
+    m_DefaultBoxCache.TouchedPrimitives.Empty();
+    m_DefaultCapsuleCache.TouchedPrimitives.Empty();
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Trimesh data
+
+dxTriMeshData::~dxTriMeshData()
+{
+    if ( m_InternalUseFlags != NULL )
+    {
+        sizeint flagsMemoryRequired = calculateUseFlagsMemoryRequirement();
+        dFree(m_InternalUseFlags, flagsMemoryRequired);
+    }
+}
+
+void dxTriMeshData::buildData(const Point *Vertices, int VertexStide, unsigned VertexCount,
+    const IndexedTriangle *Indices, unsigned IndexCount, int TriStride,
+    const dReal *in_Normals,
+    bool Single)
+{
+    dxTriMeshData_Parent::buildData(Vertices, VertexStide, VertexCount, Indices, IndexCount, TriStride, in_Normals, Single);
+    dAASSERT(IndexCount % dMTV__MAX == 0);
+
+    m_Mesh.SetNbTriangles(IndexCount / dMTV__MAX);
+    m_Mesh.SetNbVertices(VertexCount);
+    m_Mesh.SetPointers(Indices, Vertices);
+    m_Mesh.SetStrides(TriStride, VertexStide);
+    m_Mesh.SetSingle(Single);
+
+    // Build tree
+    // recommended in Opcode User Manual
+    //Settings.mRules = SPLIT_COMPLETE | SPLIT_SPLATTERPOINTS | SPLIT_GEOMCENTER;
+    // used in ODE, why?
+    //Settings.mRules = SPLIT_BEST_AXIS;
+    // best compromise?
+    BuildSettings Settings(SPLIT_BEST_AXIS | SPLIT_SPLATTER_POINTS | SPLIT_GEOM_CENTER);
+
+    OPCODECREATE TreeBuilder(&m_Mesh, Settings, true, false);
+
+    m_BVTree.Build(TreeBuilder);
+
+    // compute model space AABB
+    dVector3 AABBMax, AABBMin;
+    calculateDataAABB(AABBMax, AABBMin);
+
+    dAddVectors3(m_AABBCenter, AABBMin, AABBMax);
+    dScaleVector3(m_AABBCenter, REAL(0.5));
+
+    dSubtractVectors3(m_AABBExtents, AABBMax, m_AABBCenter);
+
+    // user data (not used by OPCODE)
+    dIASSERT(m_InternalUseFlags == NULL);
+}
+
+
+void dxTriMeshData::calculateDataAABB(dVector3 &AABBMax, dVector3 &AABBMin)
+{
+    if (isSingle()) 
+    {
+        templateCalculateDataAABB<float>(AABBMax, AABBMin);
+    } 
+    else 
+    {
+        templateCalculateDataAABB<double>(AABBMax, AABBMin);
+    }
+}
+
+template<typename treal>
+void dxTriMeshData::templateCalculateDataAABB(dVector3 &AABBMax, dVector3 &AABBMin)
+{
+    dIASSERT(isSingle() == (sizeof(treal) == sizeof(float)));
+
+    const Point *vertices = retrieveVertexInstances();
+    const int vertexStide = retrieveVertexStride();
+    const unsigned vertexCount = retrieveVertexCount();
+
+    AABBMax[dV3E_X] = AABBMax[dV3E_Y] = AABBMax[dV3E_Z] = -dInfinity;
+    AABBMin[dV3E_X] = AABBMin[dV3E_Y] = AABBMin[dV3E_Z] = dInfinity;
+    dSASSERT(dV3E__AXES_COUNT == 3);
+
+    const uint8 *verts = (const uint8 *)vertices;
+    for( unsigned i = 0; i < vertexCount; ++i ) 
+    {
+        const treal *v = (const treal *)verts;
+        if( v[dSA_X] > AABBMax[dV3E_X] ) AABBMax[dV3E_X] = (dReal)v[dSA_X];
+        if( v[dSA_X] < AABBMin[dV3E_X] ) AABBMin[dV3E_X] = (dReal)v[dSA_X];
+        if( v[dSA_Y] > AABBMax[dV3E_Y] ) AABBMax[dV3E_Y] = (dReal)v[dSA_Y];
+        if( v[dSA_Y] < AABBMin[dV3E_Y] ) AABBMin[dV3E_Y] = (dReal)v[dSA_Y];
+        if( v[dSA_Z] > AABBMax[dV3E_Z] ) AABBMax[dV3E_Z] = (dReal)v[dSA_Z];
+        if( v[dSA_Z] < AABBMin[dV3E_Z] ) AABBMin[dV3E_Z] = (dReal)v[dSA_Z];
+        verts += vertexStide;
+    }
+}
+
+
+bool dxTriMeshData::preprocessData(bool buildUseFlags/*=false*/, FaceAngleStorageMethod faceAndgesRequirement/*=ASM__INVALID*/)
+{
+    bool buildUseFlagsToUse = buildUseFlags;
+    FaceAngleStorageMethod faceAndgesRequirementToUse = faceAndgesRequirement;
+
+    if (buildUseFlags && haveUseFlagsBeenBuilt())
+    {
+        dUASSERT(false, "Another request to build edge/vertex use flags after they had already been built");
+
+        buildUseFlagsToUse = false;
+    }
+
+    if (faceAndgesRequirement != ASM__INVALID && haveFaceAnglesBeenBuilt())
+    {
+        dUASSERT(false, "Another request to build face angles after they had already been built");
+
+        faceAndgesRequirementToUse = ASM__INVALID;
+    }
+
+    // If this mesh has already been preprocessed, exit
+    bool result = (!buildUseFlagsToUse && faceAndgesRequirementToUse == ASM__INVALID) || m_Mesh.GetNbTriangles() == 0 
+        || meaningfulPreprocessData(buildUseFlagsToUse, faceAndgesRequirementToUse);
+    return result;
+}
+
+struct TrimeshDataVertexIndexAccessor_OPCODE
+{
+    TrimeshDataVertexIndexAccessor_OPCODE(const IndexedTriangle *triIndicesBegin, unsigned triStride):
+        m_TriIndicesBegin(triIndicesBegin),
+        m_TriStride(triStride)
+    {
+    }
+
+    void getTriangleVertexIndices(unsigned out_VertexIndices[dMTV__MAX], unsigned triangleIdx) const
+    {
+        const IndexedTriangle *triIndicesBegin = m_TriIndicesBegin;
+        const unsigned triStride = m_TriStride;
+
+        const IndexedTriangle *triIndicesOfInterest = (const IndexedTriangle *)((const uint8 *)triIndicesBegin + triangleIdx * (sizeint)triStride);
+        std::copy(triIndicesOfInterest->mVRef, triIndicesOfInterest->mVRef + dMTV__MAX, out_VertexIndices);
+        dSASSERT(dMTV__MAX == dARRAY_SIZE(triIndicesOfInterest->mVRef));
+        dSASSERT(dMTV_FIRST == 0);
+        dSASSERT(dMTV_SECOND == 1);
+        dSASSERT(dMTV_THIRD == 2);
+        dSASSERT(dMTV__MAX == 3);
+    }
+
+
+    const IndexedTriangle   *m_TriIndicesBegin;
+    unsigned                m_TriStride;
+};
+
+struct TrimeshDataTrianglePointAccessor_OPCODE
+{
+    TrimeshDataTrianglePointAccessor_OPCODE(const MeshInterface &mesh):
+        m_Mesh(mesh)
+    {
+    }
+
+    void getTriangleVertexPoints(dVector3 out_Points[dMTV__MAX], unsigned triangleIndex) const
+    {
+        VertexPointers vpTriangle;
+        ConversionArea vc;
+        m_Mesh.GetTriangle(vpTriangle, triangleIndex, vc);
+
+        for (unsigned pointIndex = 0; pointIndex != 3; ++pointIndex)
+        {
+            dAssignVector3(out_Points[pointIndex], vpTriangle.Vertex[pointIndex]->x, vpTriangle.Vertex[pointIndex]->y, vpTriangle.Vertex[pointIndex]->z);
+        }
+        dSASSERT(dMTV_FIRST == 0);
+        dSASSERT(dMTV_SECOND == 1);
+        dSASSERT(dMTV_THIRD == 2);
+        dSASSERT(dMTV__MAX == 3);
+    }
+
+    const MeshInterface     &m_Mesh;
+};
+
+bool dxTriMeshData::meaningfulPreprocessData(bool buildUseFlags/*=false*/, FaceAngleStorageMethod faceAndgesRequirement/*=ASM__INVALID*/)
+{
+    const bool buildFaceAngles = faceAndgesRequirement != ASM__INVALID;
+    dIASSERT(buildUseFlags || buildFaceAngles);
+    dIASSERT(!buildUseFlags || !haveUseFlagsBeenBuilt());
+    dIASSERT(!buildFaceAngles || !haveFaceAnglesBeenBuilt());
+
+    bool result = false;
+
+    uint8 *useFlags = NULL;
+    sizeint flagsMemoryRequired = 0;
+    bool flagsAllocated = false, anglesAllocated = false;
+
+    do 
+    {
+        if (buildUseFlags)
+        {
+            flagsMemoryRequired = calculateUseFlagsMemoryRequirement();
+            useFlags = (uint8 *)dAlloc(flagsMemoryRequired);
+
+            if (useFlags == NULL)
+            {
+                break;
+            }
+        }
+
+        flagsAllocated = true;
+
+        if (buildFaceAngles)
+        {
+            if (!allocateFaceAngles(faceAndgesRequirement))
+            {
+                break;
+            }
+        }
+
+        anglesAllocated = true;
+
+        const unsigned int numTris = m_Mesh.GetNbTriangles();
+        const unsigned int numVertices = m_Mesh.GetNbVertices();
+        sizeint numEdges = (sizeint)numTris * dMTV__MAX;
+        dIASSERT(numVertices <= numEdges); // Edge records are going to be used for vertex data as well
+
+        const sizeint recordsMemoryRequired = dEFFICIENT_SIZE(numEdges * sizeof(EdgeRecord));
+        const sizeint verticesMemoryRequired = /*dEFFICIENT_SIZE*/(numVertices * sizeof(VertexRecord)); // Skip alignment for the last chunk
+        const sizeint totalTempMemoryRequired = recordsMemoryRequired + verticesMemoryRequired;
+        void *tempBuffer = dAlloc(totalTempMemoryRequired);
+        
+        if (tempBuffer == NULL)
+        {
+            break;
+        }
+
+        EdgeRecord *edges = (EdgeRecord *)tempBuffer;
+        VertexRecord *vertices = (VertexRecord *)((uint8 *)tempBuffer + recordsMemoryRequired);
+
+        // Delay zero-filling until all the allocations succeed
+        if (useFlags != NULL)
+        {
+            memset(useFlags, 0, flagsMemoryRequired);
+        }
+
+        const IndexedTriangle *triIndicesBegin = m_Mesh.GetTris();
+        unsigned triStride = m_Mesh.GetTriStride();
+        TrimeshDataVertexIndexAccessor_OPCODE indexAccessor(triIndicesBegin, triStride);
+        meaningfulPreprocess_SetupEdgeRecords(edges, numEdges, indexAccessor);
+
+        // Sort the edges, so the ones sharing the same verts are beside each other
+        std::sort(edges, edges + numEdges);
+
+        TrimeshDataTrianglePointAccessor_OPCODE pointAccessor(m_Mesh);
+        const dReal *const externalNormals = retrieveNormals();
+        IFaceAngleStorageControl *faceAngles = retrieveFaceAngles();
+        meaningfulPreprocess_buildEdgeFlags(useFlags, faceAngles, edges, numEdges, vertices, externalNormals, pointAccessor);
+
+        dFree(tempBuffer, totalTempMemoryRequired);
+    	
+        if (buildUseFlags)
+        {
+            m_InternalUseFlags = useFlags;
+        }
+
+        result = true;
+    }
+    while (false);
+
+    if (!result)
+    {
+        if (flagsAllocated)
+        {
+            if (anglesAllocated)
+            {
+                if (buildFaceAngles)
+                {
+                    freeFaceAngles();
+                }
+            }
+
+            if (buildUseFlags)
+            {
+                dFree(useFlags, flagsMemoryRequired);
+            }
+        }
+    }
+
+    return result;
+}
+
+
+void dxTriMeshData::updateData()
+{
+    m_BVTree.Refit();
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// dxTriMesh
+
+dxTriMesh::~dxTriMesh()
+{
+    //
+}
+
+void dxTriMesh::clearTCCache()
+{
+    /* dxTriMesh::ClearTCCache uses dArray's setSize(0) to clear the caches -
+    but the destructor isn't called when doing this, so we would leak.
+    So, call the previous caches' containers' destructors by hand first. */
+    int i, n;
+
+    n = m_SphereTCCache.size();
+    for( i = 0; i != n; ++i ) 
+    {
+        m_SphereTCCache[i].~SphereTC();
+    }
+    m_SphereTCCache.setSize(0);
+
+    n = m_BoxTCCache.size();
+    for( i = 0; i != n; ++i ) 
+    {
+        m_BoxTCCache[i].~BoxTC();
+    }
+    m_BoxTCCache.setSize(0);
+
+    n = m_CapsuleTCCache.size();
+    for( i = 0; i != n; ++i ) 
+    {
+        m_CapsuleTCCache[i].~CapsuleTC();
+    }
+    m_CapsuleTCCache.setSize(0);
+}
+
+
+bool dxTriMesh::controlGeometry(int controlClass, int controlCode, void *dataValue, int *dataSize)
+{
+    if (controlClass == dGeomColliderControlClass) 
+    {
+        if (controlCode == dGeomCommonAnyControlCode) 
+        {
+            return checkControlValueSizeValidity(dataValue, dataSize, 0);
+        }
+        else if (controlCode == dGeomColliderSetMergeSphereContactsControlCode) 
+        {
+            return checkControlValueSizeValidity(dataValue, dataSize, sizeof(int)) 
+                && controlGeometry_SetMergeSphereContacts(*(int *)dataValue);
+        }
+        else if (controlCode == dGeomColliderGetMergeSphereContactsControlCode) 
+        {
+            return checkControlValueSizeValidity(dataValue, dataSize, sizeof(int)) 
+                && controlGeometry_GetMergeSphereContacts(*(int *)dataValue);
+        }
+    }
+
+    return dxTriMesh_Parent::controlGeometry(controlClass, controlCode, dataValue, dataSize);
+}
+
+bool dxTriMesh::controlGeometry_SetMergeSphereContacts(int dataValue)
+{
+    if (dataValue == dGeomColliderMergeContactsValue__Default) 
+    {
+        m_SphereContactsMergeOption = (dxContactMergeOptions)MERGE_NORMALS__SPHERE_DEFAULT;
+    }
+    else if (dataValue == dGeomColliderMergeContactsValue_None) 
+    {
+        m_SphereContactsMergeOption = DONT_MERGE_CONTACTS;
+    }
+    else if (dataValue == dGeomColliderMergeContactsValue_Normals) 
+    {
+        m_SphereContactsMergeOption = MERGE_CONTACT_NORMALS;
+    }
+    else if (dataValue == dGeomColliderMergeContactsValue_Full) 
+    {
+        m_SphereContactsMergeOption = MERGE_CONTACTS_FULLY;
+    }
+    else 
+    {
+        dAASSERT(false && "Invalid contact merge control value");
+        return false;
+    }
+
+    return true;
+}
+
+bool dxTriMesh::controlGeometry_GetMergeSphereContacts(int &returnValue)
+{
+    if (m_SphereContactsMergeOption == DONT_MERGE_CONTACTS) {
+        returnValue = dGeomColliderMergeContactsValue_None;
+    }
+    else if (m_SphereContactsMergeOption == MERGE_CONTACT_NORMALS) {
+        returnValue = dGeomColliderMergeContactsValue_Normals;
+    }
+    else if (m_SphereContactsMergeOption == MERGE_CONTACTS_FULLY) {
+        returnValue = dGeomColliderMergeContactsValue_Full;
+    }
+    else {
+        dIASSERT(false && "Internal error: unexpected contact merge option field value");
+        return false;
+    }
+
+    return true;
+}
+
+
+/*virtual */
+void dxTriMesh::computeAABB() 
+{
+    const dxTriMeshData *meshData = getMeshData();
+    dVector3 c;
+    const dMatrix3& R = final_posr->R;
+    const dVector3& pos = final_posr->pos;
+
+    dMultiply0_331( c, R, meshData->m_AABBCenter );
+
+    dReal xrange = dFabs(R[0] * meshData->m_AABBExtents[0]) +
+        dFabs(R[1] * meshData->m_AABBExtents[1]) + 
+        dFabs(R[2] * meshData->m_AABBExtents[2]);
+    dReal yrange = dFabs(R[4] * meshData->m_AABBExtents[0]) +
+        dFabs(R[5] * meshData->m_AABBExtents[1]) + 
+        dFabs(R[6] * meshData->m_AABBExtents[2]);
+    dReal zrange = dFabs(R[8] * meshData->m_AABBExtents[0]) +
+        dFabs(R[9] * meshData->m_AABBExtents[1]) + 
+        dFabs(R[10] * meshData->m_AABBExtents[2]);
+
+    aabb[0] = c[0] + pos[0] - xrange;
+    aabb[1] = c[0] + pos[0] + xrange;
+    aabb[2] = c[1] + pos[1] - yrange;
+    aabb[3] = c[1] + pos[1] + yrange;
+    aabb[4] = c[2] + pos[2] - zrange;
+    aabb[5] = c[2] + pos[2] + zrange;
+}
+
+
+void dxTriMesh::fetchMeshTransformedTriangle(dVector3 *const pout_triangle[3], unsigned index)
+{
+    const dVector3 &position = buildUpdatedPosition();
+    const dMatrix3 &rotation = buildUpdatedRotation();
+    fetchMeshTriangle(pout_triangle, index, position, rotation);
+}
+
+void dxTriMesh::fetchMeshTransformedTriangle(dVector3 out_triangle[3], unsigned index)
+{
+    const dVector3 &position = buildUpdatedPosition();
+    const dMatrix3 &rotation = buildUpdatedRotation();
+    fetchMeshTriangle(out_triangle, index, position, rotation);
+}
+
+void dxTriMesh::fetchMeshTriangle(dVector3 *const pout_triangle[3], unsigned index, const dVector3 position, const dMatrix3 rotation) const
+{
+    dIASSERT(dIN_RANGE(index, 0, getMeshTriangleCount()));
+
+    VertexPointers VP;
+    ConversionArea VC;
+
+    const dxTriMeshData *meshData = getMeshData();
+    meshData->m_Mesh.GetTriangle(VP, index, VC);
+
+    for (unsigned i = 0; i != 3; ++i)
+    {
+        if (pout_triangle[i] != NULL)
+        {
+            dVector3 v;
+            v[dV3E_X] = VP.Vertex[i]->x;
+            v[dV3E_Y] = VP.Vertex[i]->y;
+            v[dV3E_Z] = VP.Vertex[i]->z;
+
+            dVector3 &out_triangle = *(pout_triangle[i]);
+            dMultiply0_331(out_triangle, rotation, v);
+            dAddVectors3(out_triangle, out_triangle, position);
+            out_triangle[dV3E_PAD] = REAL(0.0);
+        }
+    }
+}
+
+void dxTriMesh::fetchMeshTriangle(dVector3 out_triangle[3], unsigned index, const dVector3 position, const dMatrix3 rotation) const
+{
+    dIASSERT(dIN_RANGE(index, 0, getMeshTriangleCount()));
+
+    VertexPointers VP;
+    ConversionArea VC;
+
+    const dxTriMeshData *meshData = getMeshData();
+    meshData->m_Mesh.GetTriangle(VP, index, VC);
+
+    for (unsigned i = 0; i != 3; ++i)
+    {
+        dVector3 v;
+        v[dV3E_X] = VP.Vertex[i]->x;
+        v[dV3E_Y] = VP.Vertex[i]->y;
+        v[dV3E_Z] = VP.Vertex[i]->z;
+
+        dMultiply0_331(out_triangle[i], rotation, v);
+        dAddVectors3(out_triangle[i], out_triangle[i], position);
+        out_triangle[i][dV3E_PAD] = REAL(0.0);
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+
+/*extern */
+dTriMeshDataID dGeomTriMeshDataCreate()
+{
+    return new dxTriMeshData();
+}
+
+/*extern */
+void dGeomTriMeshDataDestroy(dTriMeshDataID g)
+{
+    dxTriMeshData *mesh = g;
+    delete mesh;
+}
+
+
+/*extern */
+void dGeomTriMeshDataSet(dTriMeshDataID g, int dataId, void *pDataLocation)
+{
+    dUASSERT(g, "The argument is not a trimesh data");
+
+    dxTriMeshData *data = g;
+
+    switch (dataId)
+    {
+        case dTRIMESHDATA_FACE_NORMALS:
+        {
+            data->assignNormals((const dReal *)pDataLocation);
+            break;
+        }
+
+        case dTRIMESHDATA_USE_FLAGS:
+        {
+            data->assignExternalUseFlagsBuffer((uint8 *)pDataLocation);
+            break;
+        }
+
+        // case dTRIMESHDATA__MAX: -- To be located by Find in Files
+        default:
+        {
+            dUASSERT(dataId, "invalid data type");
+            break;
+        }
+    }
+}
+
+static void *geomTriMeshDataGet(dTriMeshDataID g, int dataId, sizeint *pOutDataSize);
+
+/*extern */
+void *dGeomTriMeshDataGet(dTriMeshDataID g, int dataId, sizeint *pOutDataSize)
+{
+    return geomTriMeshDataGet(g, dataId, NULL);
+}
+
+/*extern */
+void *dGeomTriMeshDataGet2(dTriMeshDataID g, int dataId, sizeint *pOutDataSize)
+{
+    return geomTriMeshDataGet(g, dataId, pOutDataSize);
+}
+
+static 
+void *geomTriMeshDataGet(dTriMeshDataID g, int dataId, sizeint *pOutDataSize)
+{
+    dUASSERT(g, "The argument is not a trimesh data");
+
+    const dxTriMeshData *data = g;
+
+    void *result = NULL;
+
+    switch (dataId)
+    {
+        case dTRIMESHDATA_FACE_NORMALS:
+        {
+            if (pOutDataSize != NULL)
+            {
+                *pOutDataSize = data->calculateNormalsMemoryRequirement();
+            }
+
+            result = (void *)data->retrieveNormals();
+            break;
+        }
+
+        case dTRIMESHDATA_USE_FLAGS:
+        {
+            if (pOutDataSize != NULL)
+            {
+                *pOutDataSize = data->calculateUseFlagsMemoryRequirement();
+            }
+
+            result = const_cast<uint8 *>(data->smartRetrieveUseFlags());
+            break;
+        }
+
+        // case dTRIMESHDATA__MAX: -- To be located by Find in Files
+        default:
+        {
+            if (pOutDataSize != NULL)
+            {
+                *pOutDataSize = 0;
+            }
+
+            dUASSERT(dataId, "invalid data type");
+            break;
+        }
+    }
+
+    return result;
+}
+
+
+/*extern */
+void dGeomTriMeshDataBuildSingle1(dTriMeshDataID g,
+    const void* Vertices, int VertexStride, int VertexCount, 
+    const void* Indices, int IndexCount, int TriStride,
+    const void* Normals)
+{
+    dUASSERT(g, "The argument is not a trimesh data");
+
+    dxTriMeshData *data = g;
+    data->buildData((const Point *)Vertices, VertexStride, VertexCount, 
+        (const IndexedTriangle *)Indices, IndexCount, TriStride, 
+        (const dReal *)Normals, 
+        true);
+}
+
+/*extern */
+void dGeomTriMeshDataBuildDouble1(dTriMeshDataID g,
+    const void* Vertices, int VertexStride, int VertexCount, 
+    const void* Indices, int IndexCount, int TriStride,
+    const void* Normals)
+{
+    dUASSERT(g, "The argument is not a trimesh data");
+
+    g->buildData((const Point *)Vertices, VertexStride, VertexCount, 
+        (const IndexedTriangle *)Indices, IndexCount, TriStride, 
+        (const dReal *)Normals, 
+        false);
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+
+/*extern */
+dGeomID dCreateTriMesh(dSpaceID space, 
+    dTriMeshDataID Data,
+    dTriCallback* Callback,
+    dTriArrayCallback* ArrayCallback,
+    dTriRayCallback* RayCallback)
+{
+    dxTriMesh *mesh = new dxTriMesh(space, Data, Callback, ArrayCallback, RayCallback);
+    return mesh;
+}
+
+
+/*extern */
+void dGeomTriMeshSetLastTransform(dGeomID g, const dMatrix4 last_trans )
+{
+    dAASSERT(g);
+    dUASSERT(g->type == dTriMeshClass, "The geom is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    mesh->assignLastTransform(last_trans);
+}
+
+/*extern */
+const dReal *dGeomTriMeshGetLastTransform(dGeomID g)
+{
+    dAASSERT(g);
+    dUASSERT(g->type == dTriMeshClass, "The geom is not a trimesh");
+
+    dxTriMesh *mesh = static_cast<dxTriMesh *>(g);
+    return mesh->retrieveLastTransform();
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+
+// Cleanup for allocations when shutting down ODE
+/*extern */
+void opcode_collider_cleanup()
+{
+#if !dTLS_ENABLED
+
+    // Clear TC caches
+    TrimeshCollidersCache *pccColliderCache = GetTrimeshCollidersCache(0);
+    pccColliderCache->clearOPCODECaches();
+
+#endif // dTLS_ENABLED
+}
+
+
+#endif // dTRIMESH_ENABLED && dTRIMESH_OPCODE
+
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_opcode.h b/libs/ode-0.16.1/ode/src/collision_trimesh_opcode.h
new file mode 100644
index 0000000..fdce2f1
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_opcode.h
@@ -0,0 +1,333 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// TriMesh code by Erwin de Vries.
+// Modified for FreeSOLID Compatibility by Rodrigo Hernandez
+// Trimesh caches separation by Oleh Derevenko
+// TriMesh storage classes refactoring and face angle computation code by Oleh Derevenko (C) 2016-2019
+
+
+#ifndef _ODE_COLLISION_TRIMESH_OPCODE_H_
+#define _ODE_COLLISION_TRIMESH_OPCODE_H_
+
+
+#if dTRIMESH_ENABLED && dTRIMESH_OPCODE
+
+//****************************************************************************
+// dxTriMesh class
+
+
+#include "collision_kernel.h"
+#include "collision_trimesh_colliders.h"
+#include "collision_util.h"
+#include <ode/collision_trimesh.h>
+
+#include "collision_trimesh_internal.h"
+
+#define BAN_OPCODE_AUTOLINK
+#include "Opcode.h"
+using namespace Opcode;
+
+
+#if !dTRIMESH_OPCODE_USE_OLD_TRIMESH_TRIMESH_COLLIDER
+
+// New trimesh collider hash table types
+enum
+{
+    MAXCONTACT_X_NODE = 4,
+    CONTACTS_HASHSIZE = 256
+};
+
+struct CONTACT_KEY
+{
+    dContactGeom * m_contact;
+    unsigned int m_key;
+};
+
+struct CONTACT_KEY_HASH_NODE
+{
+    CONTACT_KEY m_keyarray[MAXCONTACT_X_NODE];
+    int m_keycount;
+};
+
+struct CONTACT_KEY_HASH_TABLE
+{
+public:
+    CONTACT_KEY_HASH_NODE &operator[](unsigned int index) { return m_storage[index]; }
+
+private:
+    CONTACT_KEY_HASH_NODE m_storage[CONTACTS_HASHSIZE];
+};
+
+#endif // !dTRIMESH_OPCODE_USE_OLD_TRIMESH_TRIMESH_COLLIDER
+
+
+struct VertexUseCache
+{
+public:
+    VertexUseCache(): m_VertexUseBits(NULL), m_VertexUseElements(0) {}
+    ~VertexUseCache() { freeVertexUSEDFlags();  }
+
+    bool resizeAndResetVertexUSEDFlags(unsigned VertexCount)
+    {
+        bool Result = false;
+        sizeint VertexNewElements = (VertexCount + 7) / 8;
+        if (VertexNewElements <= m_VertexUseElements || reallocVertexUSEDFlags(VertexNewElements)) {
+            memset(m_VertexUseBits, 0, VertexNewElements);
+            Result = true;
+        }
+        return Result;
+    }
+
+    bool getVertexUSEDFlag(unsigned VertexIndex) const { return (m_VertexUseBits[VertexIndex / 8] & (1 << (VertexIndex % 8))) != 0; }
+    void setVertexUSEDFlag(unsigned VertexIndex) { m_VertexUseBits[VertexIndex / 8] |= (1 << (VertexIndex % 8)); }
+
+private:
+    bool reallocVertexUSEDFlags(sizeint VertexNewElements)
+    {
+        bool Result = false;
+        uint8 *VertexNewBits = (uint8 *)dRealloc(m_VertexUseBits, m_VertexUseElements * sizeof(m_VertexUseBits[0]), VertexNewElements * sizeof(m_VertexUseBits[0]));
+        if (VertexNewBits) {
+            m_VertexUseBits = VertexNewBits;
+            m_VertexUseElements = VertexNewElements;
+            Result = true;
+        }
+        return Result;
+    }
+
+    void freeVertexUSEDFlags()
+    {
+        dFree(m_VertexUseBits, m_VertexUseElements * sizeof(m_VertexUseBits[0]));
+        m_VertexUseBits = NULL;
+        m_VertexUseElements = 0;
+    }
+
+private:
+    uint8 *m_VertexUseBits;
+    sizeint m_VertexUseElements;
+};
+
+
+struct TrimeshCollidersCache
+{
+    TrimeshCollidersCache()
+    {
+        initOPCODECaches();
+    }
+
+    void initOPCODECaches();
+    void clearOPCODECaches();
+
+    // Collider caches
+    BVTCache ColCache;
+
+#if !dTRIMESH_OPCODE_USE_OLD_TRIMESH_TRIMESH_COLLIDER
+    CONTACT_KEY_HASH_TABLE m_hashcontactset;
+#endif
+
+    // Colliders
+    /* -- not used -- also uncomment in InitOPCODECaches()
+    PlanesCollider _PlanesCollider; -- not used 
+    */
+    SphereCollider m_SphereCollider;
+    OBBCollider m_OBBCollider;
+    RayCollider m_RayCollider;
+    AABBTreeCollider m_AABBTreeCollider;
+    /* -- not used -- also uncomment in InitOPCODECaches()
+    LSSCollider _LSSCollider;
+    */
+    // Trimesh caches
+    CollisionFaces m_Faces;
+    SphereCache m_DefaultSphereCache;
+    OBBCache m_DefaultBoxCache;
+    LSSCache m_DefaultCapsuleCache;
+
+    // Trimesh-plane collision vertex use cache
+    VertexUseCache m_VertexUses;
+};
+
+
+typedef dxTriDataBase dxTriMeshData_Parent;
+struct dxTriMeshData:
+    public dxTriMeshData_Parent
+{
+public:
+    dxTriMeshData():
+        dxTriMeshData_Parent(),
+        m_ExternalUseFlags(NULL),
+        m_InternalUseFlags(NULL)
+    {
+    }
+
+    ~dxTriMeshData();
+
+    void buildData(const Point *Vertices, int VertexStide, unsigned VertexCount,
+        const IndexedTriangle *Indices, unsigned IndexCount, int TriStride,
+        const dReal *in_Normals,
+        bool Single);
+
+private:
+    void calculateDataAABB(dVector3 &AABBMax, dVector3 &AABBMin);
+    template<typename treal>
+    void templateCalculateDataAABB(dVector3 &AABBMax, dVector3 &AABBMin);
+
+public:
+    /* Setup the UseFlags array and/or build face angles*/
+    bool preprocessData(bool buildUseFlags/*=false*/, FaceAngleStorageMethod faceAndgesRequirement/*=ASM__INVALID*/);
+
+private:
+    bool meaningfulPreprocessData(bool buildUseFlags/*=false*/, FaceAngleStorageMethod faceAndgesRequirement/*=ASM__INVALID*/);
+
+public:
+    /* For when app changes the vertices */
+    void updateData();
+
+public:
+    const Point *retrieveVertexInstances() const { return (const Point *)dxTriMeshData_Parent::retrieveVertexInstances(); }
+
+public:
+    void assignNormals(const dReal *normals) { dxTriMeshData_Parent::assignNormals(normals); }
+    const dReal *retrieveNormals() const { return (const dReal *)dxTriMeshData_Parent::retrieveNormals(); }
+    sizeint calculateNormalsMemoryRequirement() const { return retrieveTriangleCount() * (sizeof(dReal) * dSA__MAX); }
+
+public:
+    void assignExternalUseFlagsBuffer(uint8 *buffer) { m_ExternalUseFlags = buffer != m_InternalUseFlags ? buffer : NULL; }
+    const uint8 *smartRetrieveUseFlags() const { return m_ExternalUseFlags != NULL ? m_ExternalUseFlags : m_InternalUseFlags; }
+    bool haveUseFlagsBeenBuilt() const { return m_InternalUseFlags != NULL; }
+    sizeint calculateUseFlagsMemoryRequirement() const { return m_Mesh.GetNbTriangles() * sizeof(m_InternalUseFlags[0]); }
+
+public:
+    Model m_BVTree;
+    MeshInterface m_Mesh;
+
+    /* aabb in model space */
+    dVector3 m_AABBCenter;
+    dVector3 m_AABBExtents;
+
+    // data for use in collision resolution
+    uint8 *m_ExternalUseFlags;
+    uint8 *m_InternalUseFlags;
+
+};
+
+
+typedef dxMeshBase dxTriMesh_Parent;
+struct dxTriMesh: 
+    public dxTriMesh_Parent
+{
+public:
+    // Functions
+    dxTriMesh(dxSpace *Space, dxTriMeshData *Data, 
+        dTriCallback *Callback, dTriArrayCallback *ArrayCallback, dTriRayCallback *RayCallback):
+        dxTriMesh_Parent(Space, Data, Callback, ArrayCallback, RayCallback, false)
+    {
+        m_SphereContactsMergeOption = (dxContactMergeOptions)MERGE_NORMALS__SPHERE_DEFAULT;
+
+        dZeroMatrix4(m_last_trans);
+    }
+
+    ~dxTriMesh();
+
+    void clearTCCache();
+
+    bool controlGeometry(int controlClass, int controlCode, void *dataValue, int *dataSize);
+
+    virtual void computeAABB();
+
+public:
+    dxTriMeshData *retrieveMeshData() const { return getMeshData(); }
+    const dReal *retrieveMeshNormals() const { return getMeshData()->retrieveNormals(); }
+    Model &retrieveMeshBVTreeRef() const { return getMeshData()->m_BVTree; }
+    const uint8 *retrieveMeshSmartUseFlags() const { return getMeshData()->smartRetrieveUseFlags(); }
+
+    unsigned getMeshTriangleCount() const { return getMeshData()->m_Mesh.GetNbTriangles(); }
+    void fetchMeshTransformedTriangle(dVector3 *const pout_triangle[3], unsigned index)/* const*/;
+    void fetchMeshTransformedTriangle(dVector3 out_triangle[3], unsigned index)/* const*/;
+    void fetchMeshTriangle(dVector3 *const pout_triangle[3], unsigned index, const dVector3 position, const dMatrix3 rotation) const;
+    void fetchMeshTriangle(dVector3 out_triangle[3], unsigned index, const dVector3 position, const dMatrix3 rotation) const;
+
+public:
+    void assignLastTransform(const dMatrix4 last_trans) { dCopyMatrix4x4(m_last_trans, last_trans); }
+    const dReal *retrieveLastTransform() const { return m_last_trans; }
+
+private:
+    enum
+    {
+        MERGE_NORMALS__SPHERE_DEFAULT = DONT_MERGE_CONTACTS
+    };
+
+    bool controlGeometry_SetMergeSphereContacts(int dataValue);
+    bool controlGeometry_GetMergeSphereContacts(int &returnValue);
+
+private:
+    dxTriMeshData *getMeshData() const { return static_cast<dxTriMeshData *>(dxTriMesh_Parent::getMeshData()); }
+
+public:
+    // Some constants
+    // Temporal coherence
+    struct SphereTC : public SphereCache{
+        dxGeom* Geom;
+    };
+
+    struct BoxTC : public OBBCache{
+        dxGeom* Geom;
+    };
+
+    struct CapsuleTC : public LSSCache{
+        dxGeom* Geom;
+    };
+
+public:
+    // Contact merging option
+    dxContactMergeOptions m_SphereContactsMergeOption;
+    // Instance data for last transform.
+    dMatrix4 m_last_trans;
+
+    dArray<SphereTC> m_SphereTCCache;
+    dArray<BoxTC> m_BoxTCCache;
+    dArray<CapsuleTC> m_CapsuleTCCache;
+};
+
+
+static inline 
+Matrix4x4 &MakeMatrix(const dVector3 Position, const dMatrix3 Rotation, Matrix4x4 &Out)
+{
+    return Out.Set(
+        Rotation[0], Rotation[4], Rotation[8], 0.0f,
+        Rotation[1], Rotation[5], Rotation[9], 0.0f,
+        Rotation[2], Rotation[6], Rotation[10],0.0f,
+        Position[0], Position[1], Position[2], 1.0f);
+}
+
+static inline 
+Matrix4x4 &MakeMatrix(dxGeom* g, Matrix4x4 &Out)
+{
+    const dVector3 &position = g->buildUpdatedPosition();
+    const dMatrix3 &rotation = g->buildUpdatedRotation();
+    return MakeMatrix(position, rotation, Out);
+}
+
+
+#endif // #if dTRIMESH_ENABLED && dTRIMESH_OPCODE
+
+
+#endif	//_ODE_COLLISION_TRIMESH_OPCODE_H_
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_plane.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_plane.cpp
new file mode 100644
index 0000000..5c3c67a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_plane.cpp
@@ -0,0 +1,226 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// TriMesh - Plane collider by David Walters, July 2006
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+
+#if dTRIMESH_ENABLED
+
+#include "collision_util.h"
+#include "collision_std.h"
+#include "collision_trimesh_internal.h"
+
+
+#if dTRIMESH_OPCODE
+
+int dCollideTrimeshPlane( dxGeom *o1, dxGeom *o2, int flags, dContactGeom* contacts, int skip )
+{
+    dIASSERT( skip >= (int)sizeof( dContactGeom ) );
+    dIASSERT( o1->type == dTriMeshClass );
+    dIASSERT( o2->type == dPlaneClass );
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    // Alias pointers to the plane and trimesh
+    dxTriMesh* trimesh = (dxTriMesh*)( o1 );
+    dxPlane* plane = (dxPlane*)( o2 );
+
+    int contact_count = 0;
+
+    // Cache the maximum contact count.
+    const int contact_max = ( flags & NUMC_MASK );
+
+    // Cache trimesh position and rotation.
+    const dVector3& trimesh_pos = *(const dVector3*)dGeomGetPosition( trimesh );
+    const dMatrix3& trimesh_R = *(const dMatrix3*)dGeomGetRotation( trimesh );
+
+    //
+    // For all triangles.
+    //
+
+    VertexPointersEx VPE;
+    VertexPointers &VP = VPE.vp;
+    ConversionArea VC;
+    dReal alpha;
+    dVector3 vertex;
+
+#if !defined(dSINGLE) || 1
+    dVector3 int_vertex;		// Intermediate vertex for double precision mode.
+#endif // dSINGLE
+
+    const unsigned uiTLSKind = trimesh->getParentSpaceTLSKind();
+    dIASSERT(uiTLSKind == plane->getParentSpaceTLSKind()); // The colliding spaces must use matching cleanup method
+    TrimeshCollidersCache *pccColliderCache = GetTrimeshCollidersCache(uiTLSKind);
+    VertexUseCache &vertex_use_cache = pccColliderCache->m_VertexUses;
+
+    // Reallocate vertex use cache if necessary
+    const dxTriMeshData *meshData = trimesh->retrieveMeshData();
+    const int vertex_count = meshData->m_Mesh.GetNbVertices();
+    const bool cache_status = vertex_use_cache.resizeAndResetVertexUSEDFlags(vertex_count);
+
+    // Cache the triangle count.
+    const int tri_count = meshData->m_Mesh.GetNbTriangles();
+
+    // For each triangle
+    for ( int t = 0; t < tri_count; ++t )
+    {
+        // Get triangle, which should also use callback.
+        bool ex_avail = meshData->m_Mesh.GetExTriangle( VPE, t, VC);
+
+        // For each vertex.
+        for ( int v = 0; v < 3; ++v )
+        {
+            // point already used ?
+            if (cache_status && ex_avail)
+            {
+                unsigned VIndex = VPE.Index[v];
+                if (vertex_use_cache.getVertexUSEDFlag(VIndex))
+                    continue;
+                // mark this point as used
+                vertex_use_cache.setVertexUSEDFlag(VIndex);
+            }
+
+            //
+            // Get Vertex
+            //
+
+#if defined(dSINGLE) && 0 // Always assign via intermediate array as otherwise it is an incapsulation violation
+
+            dMultiply0_331( vertex, trimesh_R, (float*)( VP.Vertex[ v ] ) );
+
+#else // dDOUBLE || 1
+
+            // OPCODE data is in single precision format.
+            int_vertex[ 0 ] = VP.Vertex[ v ]->x;
+            int_vertex[ 1 ] = VP.Vertex[ v ]->y;
+            int_vertex[ 2 ] = VP.Vertex[ v ]->z;
+
+            dMultiply0_331( vertex, trimesh_R, int_vertex );
+
+#endif // dSINGLE/dDOUBLE
+
+            vertex[ 0 ] += trimesh_pos[ 0 ];
+            vertex[ 1 ] += trimesh_pos[ 1 ];
+            vertex[ 2 ] += trimesh_pos[ 2 ];
+
+
+            //
+            // Collision?
+            //
+
+            // If alpha < 0 then point is if front of plane. i.e. no contact
+            // If alpha = 0 then the point is on the plane
+            alpha = plane->p[ 3 ] - dCalcVectorDot3( plane->p, vertex );
+
+            // If alpha > 0 the point is behind the plane. CONTACT!
+            if ( alpha > 0 )
+            {
+                // Alias the contact
+                dContactGeom* contact = SAFECONTACT( flags, contacts, contact_count, skip );
+
+                contact->pos[ 0 ] = vertex[ 0 ];
+                contact->pos[ 1 ] = vertex[ 1 ];
+                contact->pos[ 2 ] = vertex[ 2 ];
+
+                contact->normal[ 0 ] = plane->p[ 0 ];
+                contact->normal[ 1 ] = plane->p[ 1 ];
+                contact->normal[ 2 ] = plane->p[ 2 ];
+
+                contact->depth = alpha;
+                contact->g1 = trimesh;
+                contact->g2 = plane;
+                contact->side1 = t;
+                contact->side2 = -1;
+
+                ++contact_count;
+
+                // All contact slots are full?
+                if ( contact_count >= contact_max )
+                    return contact_count; // <=== STOP HERE
+            }
+        }
+    }
+
+    // Return contact count.
+    return contact_count;
+}
+
+
+#endif // dTRIMESH_OPCODE
+
+
+#if dTRIMESH_GIMPACT
+
+#include "gimpact_contact_export_helper.h"
+#include "gimpact_plane_contact_accessor.h"
+
+
+int dCollideTrimeshPlane( dxGeom *o1, dxGeom *o2, int flags, dContactGeom* contacts, int skip )
+{
+    dIASSERT( skip >= (int)sizeof( dContactGeom ) );
+    dIASSERT( o1->type == dTriMeshClass );
+    dIASSERT( o2->type == dPlaneClass );
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    // Alias pointers to the plane and trimesh
+    dxTriMesh* trimesh = (dxTriMesh*)( o1 );
+    dVector4 plane;
+    dGeomPlaneGetParams(o2, plane);
+
+    o1 -> recomputeAABB();
+    o2 -> recomputeAABB();
+
+    //Find collision
+
+    GDYNAMIC_ARRAY collision_result;
+    GIM_CREATE_TRIMESHPLANE_CONTACTS(collision_result);
+
+    gim_trimesh_plane_collisionODE(&trimesh->m_collision_trimesh,plane,&collision_result);
+
+    if(collision_result.m_size == 0 )
+    {
+        GIM_DYNARRAY_DESTROY(collision_result);
+        return 0;
+    }
+
+
+    vec4f * planecontact_results = GIM_DYNARRAY_POINTER(vec4f, collision_result);
+    unsigned int contactcount = collision_result.m_size;
+    
+    dxPlaneContactAccessor contactaccessor(planecontact_results, plane, o1, o2);
+    contactcount = dxGImpactContactsExportHelper::ExportMaxDepthGImpactContacts(contactaccessor, contactcount, flags, contacts, skip);
+
+    GIM_DYNARRAY_DESTROY(collision_result);
+
+    return (int)contactcount;
+}
+
+
+#endif // dTRIMESH_GIMPACT
+
+
+#endif // dTRIMESH_ENABLED
+
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_ray.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_ray.cpp
new file mode 100644
index 0000000..866758a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_ray.cpp
@@ -0,0 +1,207 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// TriMesh code by Erwin de Vries.
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+
+#if dTRIMESH_ENABLED
+
+#include "collision_util.h"
+#include "collision_trimesh_internal.h"
+
+#if dTRIMESH_OPCODE
+int dCollideRTL(dxGeom* g1, dxGeom* RayGeom, int Flags, dContactGeom* Contacts, int Stride){
+    dIASSERT (Stride >= (int)sizeof(dContactGeom));
+    dIASSERT (g1->type == dTriMeshClass);
+    dIASSERT (RayGeom->type == dRayClass);
+    dIASSERT ((Flags & NUMC_MASK) >= 1);
+
+    dxTriMesh* TriMesh = (dxTriMesh*)g1;
+
+    const unsigned uiTLSKind = TriMesh->getParentSpaceTLSKind();
+    dIASSERT(uiTLSKind == RayGeom->getParentSpaceTLSKind()); // The colliding spaces must use matching cleanup method
+    TrimeshCollidersCache *pccColliderCache = GetTrimeshCollidersCache(uiTLSKind);
+    RayCollider& Collider = pccColliderCache->m_RayCollider;
+
+    dReal Length = dGeomRayGetLength(RayGeom);
+
+    int FirstContact = dGeomRayGetFirstContact(RayGeom);
+    int BackfaceCull = dGeomRayGetBackfaceCull(RayGeom);
+    int ClosestHit = dGeomRayGetClosestHit(RayGeom);
+
+    Collider.SetFirstContact(FirstContact != 0);
+    Collider.SetClosestHit(ClosestHit != 0);
+    Collider.SetCulling(BackfaceCull != 0);
+    Collider.SetMaxDist(Length);
+
+    const dVector3& TLPosition = *(const dVector3*)dGeomGetPosition(TriMesh);
+    const dMatrix3& TLRotation = *(const dMatrix3*)dGeomGetRotation(TriMesh);
+
+    Matrix4x4 MeshMatrix;
+    const dVector3 ZeroVector3 = { REAL(0.0), };
+    MakeMatrix(ZeroVector3, TLRotation, MeshMatrix);
+
+    dVector3 Origin, Direction;
+    dGeomRayGet(RayGeom, Origin, Direction);
+
+    dVector3 OffsetOrigin;
+    dSubtractVectors3(OffsetOrigin, Origin, TLPosition);
+
+    /* Make Ray */
+    Ray WorldRay;
+    WorldRay.mOrig.Set(OffsetOrigin[0], OffsetOrigin[1], OffsetOrigin[2]);
+    WorldRay.mDir.Set(Direction[0], Direction[1], Direction[2]);
+
+    /* Intersect */
+    int TriCount = 0;
+    if (Collider.Collide(WorldRay, TriMesh->retrieveMeshBVTreeRef(), &MeshMatrix)) {
+        TriCount = pccColliderCache->m_Faces.GetNbFaces();
+    }
+
+    if (TriCount == 0) {
+        return 0;
+    }
+
+    const CollisionFace* Faces = pccColliderCache->m_Faces.GetFaces();
+
+    int OutTriCount = 0;
+    for (int i = 0; i < TriCount; i++) {
+        if (TriMesh->m_RayCallback == null ||
+            TriMesh->m_RayCallback(TriMesh, RayGeom, Faces[i].mFaceID,
+            Faces[i].mU, Faces[i].mV)) {
+                const int& TriIndex = Faces[i].mFaceID;
+                if (!TriMesh->invokeCallback(RayGeom, TriIndex)) {
+                    continue;
+                }
+
+                dContactGeom* Contact = SAFECONTACT(Flags, Contacts, OutTriCount, Stride);
+
+                dVector3 dv[3];
+                TriMesh->fetchMeshTriangle(dv, TriIndex, TLPosition, TLRotation);
+
+                dVector3 vu;
+                vu[0] = dv[1][0] - dv[0][0];
+                vu[1] = dv[1][1] - dv[0][1];
+                vu[2] = dv[1][2] - dv[0][2];
+                vu[3] = REAL(0.0);
+
+                dVector3 vv;
+                vv[0] = dv[2][0] - dv[0][0];
+                vv[1] = dv[2][1] - dv[0][1];
+                vv[2] = dv[2][2] - dv[0][2];
+                vv[3] = REAL(0.0);
+
+                dCalcVectorCross3(Contact->normal, vv, vu);	// Reversed
+
+                // Even though all triangles might be initially valid, 
+                // a triangle may degenerate into a segment after applying 
+                // space transformation.
+                if (dSafeNormalize3(Contact->normal))
+                {
+                    // No sense to save on single type conversion in algorithm of this size.
+                    // If there would be a custom typedef for distance type it could be used 
+                    // instead of dReal. However using float directly is the loss of abstraction 
+                    // and possible loss of precision in future.
+                    /*float*/ dReal T = Faces[i].mDistance;
+                    Contact->pos[0] = Origin[0] + (Direction[0] * T);
+                    Contact->pos[1] = Origin[1] + (Direction[1] * T);
+                    Contact->pos[2] = Origin[2] + (Direction[2] * T);
+                    Contact->pos[3] = REAL(0.0);
+
+                    Contact->depth = T;
+                    Contact->g1 = TriMesh;
+                    Contact->g2 = RayGeom;
+                    Contact->side1 = TriIndex;
+                    Contact->side2 = -1;
+
+                    OutTriCount++;
+
+                    // Putting "break" at the end of loop prevents unnecessary checks on first pass and "continue"
+                    if (OutTriCount >= (Flags & NUMC_MASK)) {
+                        break;
+                    }
+                }
+        }
+    }
+    return OutTriCount;
+}
+#endif // dTRIMESH_OPCODE
+
+#if dTRIMESH_GIMPACT
+int dCollideRTL(dxGeom* g1, dxGeom* RayGeom, int Flags, dContactGeom* Contacts, int Stride)
+{
+    dIASSERT (Stride >= (int)sizeof(dContactGeom));
+    dIASSERT (g1->type == dTriMeshClass);
+    dIASSERT (RayGeom->type == dRayClass);
+    dIASSERT ((Flags & NUMC_MASK) >= 1);
+
+    dxTriMesh* TriMesh = (dxTriMesh*)g1;
+
+    dReal Length = dGeomRayGetLength(RayGeom);
+    int FirstContact = dGeomRayGetFirstContact(RayGeom);
+    int BackfaceCull = dGeomRayGetBackfaceCull(RayGeom);
+    int ClosestHit = dGeomRayGetClosestHit(RayGeom);
+    dVector3 Origin, Direction;
+    dGeomRayGet(RayGeom, Origin, Direction);
+
+    char intersect=0;
+    GIM_TRIANGLE_RAY_CONTACT_DATA contact_data;
+
+    if(ClosestHit)
+    {
+        intersect = gim_trimesh_ray_closest_collisionODE(&TriMesh->m_collision_trimesh,Origin,Direction,Length,&contact_data);
+    }
+    else
+    {
+        intersect = gim_trimesh_ray_collisionODE(&TriMesh->m_collision_trimesh,Origin,Direction,Length,&contact_data);
+    }
+
+    if(intersect == 0)
+    {
+        return 0;
+    }
+
+
+    if(!TriMesh->m_RayCallback || 
+        TriMesh->m_RayCallback(TriMesh, RayGeom, contact_data.m_face_id, contact_data.u , contact_data.v))
+    {
+        dContactGeom* Contact = &( Contacts[ 0 ] );
+        VEC_COPY(Contact->pos,contact_data.m_point);
+        VEC_COPY(Contact->normal,contact_data.m_normal);
+        Contact->depth = contact_data.tparam;
+        Contact->g1 = TriMesh;
+        Contact->g2 = RayGeom;
+        Contact->side1 = contact_data.m_face_id;
+        Contact->side2 = -1;
+        return 1;
+    }
+
+    return 0;
+}
+#endif  // dTRIMESH_GIMPACT
+
+#endif // dTRIMESH_ENABLED
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_sphere.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_sphere.cpp
new file mode 100644
index 0000000..8076411
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_sphere.cpp
@@ -0,0 +1,596 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// TriMesh code by Erwin de Vries.
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_util.h"
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#if dTRIMESH_ENABLED
+
+#include "collision_trimesh_internal.h"
+
+
+#if dTRIMESH_OPCODE
+
+// Ripped from Opcode 1.1.
+static bool GetContactData(const dVector3& Center, dReal Radius, const dVector3 Origin, const dVector3 Edge0, const dVector3 Edge1, dReal& Dist, dReal& u, dReal& v){
+
+    // now onto the bulk of the collision...
+
+    dVector3 Diff;
+    Diff[0] = Origin[0] - Center[0];
+    Diff[1] = Origin[1] - Center[1];
+    Diff[2] = Origin[2] - Center[2];
+    Diff[3] = Origin[3] - Center[3];
+
+    dReal A00 = dCalcVectorDot3(Edge0, Edge0);
+    dReal A01 = dCalcVectorDot3(Edge0, Edge1);
+    dReal A11 = dCalcVectorDot3(Edge1, Edge1);
+
+    dReal B0 = dCalcVectorDot3(Diff, Edge0);
+    dReal B1 = dCalcVectorDot3(Diff, Edge1);
+
+    dReal C = dCalcVectorDot3(Diff, Diff);
+
+    dReal Det = dFabs(A00 * A11 - A01 * A01);
+    u = A01 * B1 - A11 * B0;
+    v = A01 * B0 - A00 * B1;
+
+    dReal DistSq;
+
+    if (u + v <= Det){
+        if(u < REAL(0.0)){
+            if(v < REAL(0.0)){  // region 4
+                if(B0 < REAL(0.0)){
+                    v = REAL(0.0);
+                    if (-B0 >= A00){
+                        u = REAL(1.0);
+                        DistSq = A00 + REAL(2.0) * B0 + C;
+                    }
+                    else{
+                        u = -B0 / A00;
+                        DistSq = B0 * u + C;
+                    }
+                }
+                else{
+                    u = REAL(0.0);
+                    if(B1 >= REAL(0.0)){
+                        v = REAL(0.0);
+                        DistSq = C;
+                    }
+                    else if(-B1 >= A11){
+                        v = REAL(1.0);
+                        DistSq = A11 + REAL(2.0) * B1 + C;
+                    }
+                    else{
+                        v = -B1 / A11;
+                        DistSq = B1 * v + C;
+                    }
+                }
+            }
+            else{  // region 3
+                u = REAL(0.0);
+                if(B1 >= REAL(0.0)){
+                    v = REAL(0.0);
+                    DistSq = C;
+                }
+                else if(-B1 >= A11){
+                    v = REAL(1.0);
+                    DistSq = A11 + REAL(2.0) * B1 + C;
+                }
+                else{
+                    v = -B1 / A11;
+                    DistSq = B1 * v + C;
+                }
+            }
+        }
+        else if(v < REAL(0.0)){  // region 5
+            v = REAL(0.0);
+            if (B0 >= REAL(0.0)){
+                u = REAL(0.0);
+                DistSq = C;
+            }
+            else if (-B0 >= A00){
+                u = REAL(1.0);
+                DistSq = A00 + REAL(2.0) * B0 + C;
+            }
+            else{
+                u = -B0 / A00;
+                DistSq = B0 * u + C;
+            }
+        }
+        else{  // region 0
+            // minimum at interior point
+            if (Det == REAL(0.0)){
+                u = REAL(0.0);
+                v = REAL(0.0);
+                DistSq = FLT_MAX;
+            }
+            else{
+                dReal InvDet = REAL(1.0) / Det;
+                u *= InvDet;
+                v *= InvDet;
+                DistSq = u * (A00 * u + A01 * v + REAL(2.0) * B0) + v * (A01 * u + A11 * v + REAL(2.0) * B1) + C;
+            }
+        }
+    }
+    else{
+        dReal Tmp0, Tmp1, Numer, Denom;
+
+        if(u < REAL(0.0)){  // region 2
+            Tmp0 = A01 + B0;
+            Tmp1 = A11 + B1;
+            if (Tmp1 > Tmp0){
+                Numer = Tmp1 - Tmp0;
+                Denom = A00 - REAL(2.0) * A01 + A11;
+                if (Numer >= Denom){
+                    u = REAL(1.0);
+                    v = REAL(0.0);
+                    DistSq = A00 + REAL(2.0) * B0 + C;
+                }
+                else{
+                    u = Numer / Denom;
+                    v = REAL(1.0) - u;
+                    DistSq = u * (A00 * u + A01 * v + REAL(2.0) * B0) + v * (A01 * u + A11 * v + REAL(2.0) * B1) + C;
+                }
+            }
+            else{
+                u = REAL(0.0);
+                if(Tmp1 <= REAL(0.0)){
+                    v = REAL(1.0);
+                    DistSq = A11 + REAL(2.0) * B1 + C;
+                }
+                else if(B1 >= REAL(0.0)){
+                    v = REAL(0.0);
+                    DistSq = C;
+                }
+                else{
+                    v = -B1 / A11;
+                    DistSq = B1 * v + C;
+                }
+            }
+        }
+        else if(v < REAL(0.0)){  // region 6
+            Tmp0 = A01 + B1;
+            Tmp1 = A00 + B0;
+            if (Tmp1 > Tmp0){
+                Numer = Tmp1 - Tmp0;
+                Denom = A00 - REAL(2.0) * A01 + A11;
+                if (Numer >= Denom){
+                    v = REAL(1.0);
+                    u = REAL(0.0);
+                    DistSq = A11 + REAL(2.0) * B1 + C;
+                }
+                else{
+                    v = Numer / Denom;
+                    u = REAL(1.0) - v;
+                    DistSq =  u * (A00 * u + A01 * v + REAL(2.0) * B0) + v * (A01 * u + A11 * v + REAL(2.0) * B1) + C;
+                }
+            }
+            else{
+                v = REAL(0.0);
+                if (Tmp1 <= REAL(0.0)){
+                    u = REAL(1.0);
+                    DistSq = A00 + REAL(2.0) * B0 + C;
+                }
+                else if(B0 >= REAL(0.0)){
+                    u = REAL(0.0);
+                    DistSq = C;
+                }
+                else{
+                    u = -B0 / A00;
+                    DistSq = B0 * u + C;
+                }
+            }
+        }
+        else{  // region 1
+            Numer = A11 + B1 - A01 - B0;
+            if (Numer <= REAL(0.0)){
+                u = REAL(0.0);
+                v = REAL(1.0);
+                DistSq = A11 + REAL(2.0) * B1 + C;
+            }
+            else{
+                Denom = A00 - REAL(2.0) * A01 + A11;
+                if (Numer >= Denom){
+                    u = REAL(1.0);
+                    v = REAL(0.0);
+                    DistSq = A00 + REAL(2.0) * B0 + C;
+                }
+                else{
+                    u = Numer / Denom;
+                    v = REAL(1.0) - u;
+                    DistSq = u * (A00 * u + A01 * v + REAL(2.0) * B0) + v * (A01 * u + A11 * v + REAL(2.0) * B1) + C;
+                }
+            }
+        }
+    }
+
+    Dist = dSqrt(dFabs(DistSq));
+
+    if (Dist <= Radius){
+        Dist = Radius - Dist;
+        return true;
+    }
+    else return false;
+}
+
+int dCollideSTL(dxGeom* g1, dxGeom* SphereGeom, int Flags, dContactGeom* Contacts, int Stride){
+    dIASSERT (Stride >= (int)sizeof(dContactGeom));
+    dIASSERT (g1->type == dTriMeshClass);
+    dIASSERT (SphereGeom->type == dSphereClass);
+    dIASSERT ((Flags & NUMC_MASK) >= 1);
+
+    dxTriMesh* TriMesh = (dxTriMesh*)g1;
+
+    const unsigned uiTLSKind = TriMesh->getParentSpaceTLSKind();
+    dIASSERT(uiTLSKind == SphereGeom->getParentSpaceTLSKind()); // The colliding spaces must use matching cleanup method
+    TrimeshCollidersCache *pccColliderCache = GetTrimeshCollidersCache(uiTLSKind);
+    SphereCollider& Collider = pccColliderCache->m_SphereCollider;
+
+    const dVector3& TLPosition = *(const dVector3*)dGeomGetPosition(TriMesh);
+    const dMatrix3& TLRotation = *(const dMatrix3*)dGeomGetRotation(TriMesh);
+
+    Matrix4x4 MeshMatrix;
+    const dVector3 ZeroVector3 = { REAL(0.0), };
+    MakeMatrix(ZeroVector3, TLRotation, MeshMatrix);
+
+    const dVector3& Position = *(const dVector3*)dGeomGetPosition(SphereGeom);
+    dReal Radius = dGeomSphereGetRadius(SphereGeom);
+
+    dVector3 OffsetPosition;
+    dSubtractVectors3(OffsetPosition, Position, TLPosition);
+
+    // Sphere
+    Sphere Sphere;
+    Sphere.mCenter.Set(OffsetPosition[0], OffsetPosition[1], OffsetPosition[2]);
+    Sphere.mRadius = Radius;
+
+
+    // TC results
+    if (TriMesh->getDoTC(dxTriMesh::TTC_SPHERE)) {
+        dxTriMesh::SphereTC* sphereTC = 0;
+        const int sphereCacheSize = TriMesh->m_SphereTCCache.size();
+        for (int i = 0; i != sphereCacheSize; i++){
+            if (TriMesh->m_SphereTCCache[i].Geom == SphereGeom){
+                sphereTC = &TriMesh->m_SphereTCCache[i];
+                break;
+            }
+        }
+
+        if (!sphereTC) {
+            TriMesh->m_SphereTCCache.push(dxTriMesh::SphereTC());
+
+            sphereTC = &TriMesh->m_SphereTCCache[TriMesh->m_SphereTCCache.size() - 1];
+            sphereTC->Geom = SphereGeom;
+        }
+
+        // Intersect
+        Collider.SetTemporalCoherence(true);
+        Collider.Collide(*sphereTC, Sphere, TriMesh->retrieveMeshBVTreeRef(), null, &MeshMatrix);
+    }
+    else {
+        Collider.SetTemporalCoherence(false);
+        Collider.Collide(pccColliderCache->m_DefaultSphereCache, Sphere, TriMesh->retrieveMeshBVTreeRef(), null, &MeshMatrix);
+    }
+
+    if (! Collider.GetContactStatus()) {
+        // no collision occurred
+        return 0;
+    }
+
+    // get results
+    int TriCount = Collider.GetNbTouchedPrimitives();
+    const int* Triangles = (const int*)Collider.GetTouchedPrimitives();
+
+    if (TriCount != 0){
+        if (TriMesh->m_ArrayCallback != null){
+            TriMesh->m_ArrayCallback(TriMesh, SphereGeom, Triangles, TriCount);
+        }
+
+        int OutTriCount = 0;
+        for (int i = 0; i < TriCount; i++){
+            if (OutTriCount == (Flags & NUMC_MASK)){
+                break;
+            }
+
+            const int TriIndex = Triangles[i];
+
+            dVector3 dv[3];
+            if (!TriMesh->invokeCallback(SphereGeom, TriIndex))
+                continue;
+
+            TriMesh->fetchMeshTriangle(dv, TriIndex, TLPosition, TLRotation);
+
+            dVector3& v0 = dv[0];
+            dVector3& v1 = dv[1];
+            dVector3& v2 = dv[2];
+
+            dVector3 vu;
+            vu[0] = v1[0] - v0[0];
+            vu[1] = v1[1] - v0[1];
+            vu[2] = v1[2] - v0[2];
+            vu[3] = REAL(0.0);
+
+            dVector3 vv;
+            vv[0] = v2[0] - v0[0];
+            vv[1] = v2[1] - v0[1];
+            vv[2] = v2[2] - v0[2];
+            vv[3] = REAL(0.0);
+
+            // Get plane coefficients
+            dVector4 Plane;
+            dCalcVectorCross3(Plane, vu, vv);
+
+            // Even though all triangles might be initially valid, 
+            // a triangle may degenerate into a segment after applying 
+            // space transformation.
+            if (!dSafeNormalize3(Plane)) {
+                continue;
+            }
+
+            /* If the center of the sphere is within the positive halfspace of the
+            * triangle's plane, allow a contact to be generated.
+            * If the center of the sphere made it into the positive halfspace of a
+            * back-facing triangle, then the physics update and/or velocity needs
+            * to be adjusted (penetration has occured anyway).
+            */
+
+            dReal side = dCalcVectorDot3(Plane,Position) - dCalcVectorDot3(Plane, v0);
+
+            if(side < REAL(0.0)) {
+                continue;
+            }
+
+            dReal Depth;
+            dReal u, v;
+            if (!GetContactData(Position, Radius, v0, vu, vv, Depth, u, v)){
+                continue;	// Sphere doesn't hit triangle
+            }
+
+            if (Depth < REAL(0.0)){
+                continue; // Negative depth does not produce a contact
+            }
+
+            dVector3 ContactPos;
+
+            dReal w = REAL(1.0) - u - v;
+            ContactPos[0] = (v0[0] * w) + (v1[0] * u) + (v2[0] * v);
+            ContactPos[1] = (v0[1] * w) + (v1[1] * u) + (v2[1] * v);
+            ContactPos[2] = (v0[2] * w) + (v1[2] * u) + (v2[2] * v);
+
+            // Depth returned from GetContactData is depth along 
+            // contact point - sphere center direction
+            // we'll project it to contact normal
+            dVector3 dir;
+            dir[0] = Position[0]-ContactPos[0];
+            dir[1] = Position[1]-ContactPos[1];
+            dir[2] = Position[2]-ContactPos[2];
+            dReal dirProj = dCalcVectorDot3(dir, Plane) / dSqrt(dCalcVectorDot3(dir, dir));
+
+            // Since Depth already had a requirement to be non-negative,
+            // negative direction projections should not be allowed as well,
+            // as otherwise the multiplication will result in negative contact depth.
+            if (dirProj < REAL(0.0)){
+                continue; // Zero contact depth could be ignored
+            }
+
+            dContactGeom* Contact = SAFECONTACT(Flags, Contacts, OutTriCount, Stride);
+
+            Contact->pos[0] = ContactPos[0];
+            Contact->pos[1] = ContactPos[1];
+            Contact->pos[2] = ContactPos[2];
+            Contact->pos[3] = REAL(0.0);
+
+            // Using normal as plane (reversed)
+            Contact->normal[0] = -Plane[0];
+            Contact->normal[1] = -Plane[1];
+            Contact->normal[2] = -Plane[2];
+            Contact->normal[3] = REAL(0.0);
+
+            Contact->depth = Depth * dirProj;
+            //Contact->depth = Radius - side; // (mg) penetration depth is distance along normal not shortest distance
+
+            // We need to set these unconditionally, as the merging may fail! - Bram
+            Contact->g1 = TriMesh;
+            Contact->g2 = SphereGeom;
+            Contact->side2 = -1;
+
+            Contact->side1 = TriIndex;
+
+            OutTriCount++;
+        }
+        if (OutTriCount > 0){
+            if (TriMesh->m_SphereContactsMergeOption == MERGE_CONTACTS_FULLY) {
+                dContactGeom* Contact = SAFECONTACT(Flags, Contacts, 0, Stride);
+                Contact->g1 = TriMesh;
+                Contact->g2 = SphereGeom;
+                Contact->side2 = -1;
+
+                if (OutTriCount > 1 && !(Flags & CONTACTS_UNIMPORTANT)){
+                    dVector3 pos;
+                    pos[0] = Contact->pos[0];
+                    pos[1] = Contact->pos[1];
+                    pos[2] = Contact->pos[2];
+
+                    dVector3 normal;
+                    normal[0] = Contact->normal[0] * Contact->depth;
+                    normal[1] = Contact->normal[1] * Contact->depth;
+                    normal[2] = Contact->normal[2] * Contact->depth;
+                    normal[3] = REAL(0.0);
+
+                    int TriIndex = Contact->side1;
+
+                    for (int i = 1; i < OutTriCount; i++){
+                        dContactGeom* TempContact = SAFECONTACT(Flags, Contacts, i, Stride);
+
+                        pos[0] += TempContact->pos[0];
+                        pos[1] += TempContact->pos[1];
+                        pos[2] += TempContact->pos[2];
+
+                        normal[0] += TempContact->normal[0] * TempContact->depth;
+                        normal[1] += TempContact->normal[1] * TempContact->depth;
+                        normal[2] += TempContact->normal[2] * TempContact->depth;
+
+                        TriIndex = (TriMesh->m_TriMergeCallback) ? TriMesh->m_TriMergeCallback(TriMesh, TriIndex, TempContact->side1) : -1;
+                    }
+
+                    Contact->side1 = TriIndex;
+
+                    Contact->pos[0] = pos[0] / OutTriCount;
+                    Contact->pos[1] = pos[1] / OutTriCount;
+                    Contact->pos[2] = pos[2] / OutTriCount;
+
+                    if ( !dSafeNormalize3(normal) )
+                        return OutTriCount;	// Cannot merge in this pathological case
+
+                    // Using a merged normal, means that for each intersection, this new normal will be less effective in solving the intersection.
+                    // That is why we need to correct this by increasing the depth for each intersection.
+                    // The maximum of the adjusted depths is our newly merged depth value - Bram.
+
+                    dReal mergedDepth = REAL(0.0);
+                    dReal minEffectiveness = REAL(0.5);
+                    for ( int i = 0; i < OutTriCount; ++i )
+                    {
+                        dContactGeom* TempContact = SAFECONTACT(Flags, Contacts, i, Stride);
+                        dReal effectiveness = dCalcVectorDot3(normal, TempContact->normal);
+                        if ( effectiveness < dEpsilon )
+                            return OutTriCount; // Cannot merge this pathological case
+                        // Cap our adjustment for the new normal to a factor 2, meaning a 60 deg change in normal.
+                        effectiveness = ( effectiveness < minEffectiveness ) ? minEffectiveness : effectiveness;
+                        dReal adjusted = TempContact->depth / effectiveness;
+                        mergedDepth = ( mergedDepth < adjusted ) ? adjusted : mergedDepth;
+                    }
+                    Contact->depth = mergedDepth;
+                    Contact->normal[0] = normal[0];
+                    Contact->normal[1] = normal[1];
+                    Contact->normal[2] = normal[2];
+                    Contact->normal[3] = normal[3];
+                }
+
+                return 1;
+            }
+            else if (TriMesh->m_SphereContactsMergeOption == MERGE_CONTACT_NORMALS) {
+                if (OutTriCount != 1 && !(Flags & CONTACTS_UNIMPORTANT)){
+                    dVector3 Normal;
+
+                    dContactGeom* FirstContact = SAFECONTACT(Flags, Contacts, 0, Stride);
+                    Normal[0] = FirstContact->normal[0] * FirstContact->depth;
+                    Normal[1] = FirstContact->normal[1] * FirstContact->depth;
+                    Normal[2] = FirstContact->normal[2] * FirstContact->depth;
+                    Normal[3] = FirstContact->normal[3] * FirstContact->depth;
+
+                    for (int i = 1; i < OutTriCount; i++){
+                        dContactGeom* Contact = SAFECONTACT(Flags, Contacts, i, Stride);
+
+                        Normal[0] += Contact->normal[0] * Contact->depth;
+                        Normal[1] += Contact->normal[1] * Contact->depth;
+                        Normal[2] += Contact->normal[2] * Contact->depth;
+                        Normal[3] += Contact->normal[3] * Contact->depth;
+                    }
+
+                    dNormalize3(Normal);
+
+                    for (int i = 0; i < OutTriCount; i++){
+                        dContactGeom* Contact = SAFECONTACT(Flags, Contacts, i, Stride);
+
+                        Contact->normal[0] = Normal[0];
+                        Contact->normal[1] = Normal[1];
+                        Contact->normal[2] = Normal[2];
+                        Contact->normal[3] = Normal[3];
+                    }
+                }
+
+                return OutTriCount;
+            }
+            else {
+                dIASSERT(TriMesh->m_SphereContactsMergeOption == DONT_MERGE_CONTACTS);
+                return OutTriCount;
+            }
+        }
+        else return 0;
+    }
+    else return 0;
+}
+
+
+#endif // dTRIMESH_OPCODE
+
+
+#if dTRIMESH_GIMPACT
+
+#include "gimpact_contact_export_helper.h"
+#include "gimpact_gim_contact_accessor.h"
+
+
+int dCollideSTL(dxGeom* g1, dxGeom* SphereGeom, int Flags, dContactGeom* Contacts, int Stride)
+{
+    dIASSERT (Stride >= (int)sizeof(dContactGeom));
+    dIASSERT (g1->type == dTriMeshClass);
+    dIASSERT (SphereGeom->type == dSphereClass);
+    dIASSERT ((Flags & NUMC_MASK) >= 1);
+
+    dxTriMesh* TriMesh = (dxTriMesh*)g1;
+    dVector3& Position = *(dVector3*)dGeomGetPosition(SphereGeom);
+    dReal Radius = dGeomSphereGetRadius(SphereGeom);
+    //Create contact list
+    GDYNAMIC_ARRAY trimeshcontacts;
+    GIM_CREATE_CONTACT_LIST(trimeshcontacts);
+
+    g1 -> recomputeAABB();
+    SphereGeom -> recomputeAABB();
+
+    //Collide trimeshes
+    gim_trimesh_sphere_collisionODE(&TriMesh->m_collision_trimesh,Position,Radius,&trimeshcontacts);
+
+    if(trimeshcontacts.m_size == 0)
+    {
+        GIM_DYNARRAY_DESTROY(trimeshcontacts);
+        return 0;
+    }
+
+    GIM_CONTACT * ptrimeshcontacts = GIM_DYNARRAY_POINTER(GIM_CONTACT,trimeshcontacts);
+    unsigned contactcount = trimeshcontacts.m_size;
+
+    dxGIMCContactAccessor contactaccessor(ptrimeshcontacts, g1, SphereGeom, -1);
+    contactcount = dxGImpactContactsExportHelper::ExportMaxDepthGImpactContacts(contactaccessor, contactcount, Flags, Contacts, Stride);
+
+    GIM_DYNARRAY_DESTROY(trimeshcontacts);
+
+    return (int)contactcount;
+}
+
+
+#endif // dTRIMESH_GIMPACT
+
+
+#endif // dTRIMESH_ENABLED
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_trimesh.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_trimesh.cpp
new file mode 100644
index 0000000..27c90bc
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_trimesh.cpp
@@ -0,0 +1,1367 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// OPCODE TriMesh/TriMesh collision code
+// Written at 2006-10-28 by Francisco Le�n (http://gimpact.sourceforge.net)
+
+#ifdef _MSC_VER
+#pragma warning(disable:4244 4305)  // for VC++, no precision loss complaints
+#endif
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+
+
+#if dTRIMESH_ENABLED
+
+#include "collision_util.h"
+#include "collision_trimesh_internal.h"
+
+
+#if !dTLS_ENABLED
+// Have collider cache instance unconditionally of OPCODE or GIMPACT selection
+/*extern */TrimeshCollidersCache g_ccTrimeshCollidersCache;
+#endif
+
+
+#if dTRIMESH_OPCODE
+
+// New Implementation
+#if !dTRIMESH_OPCODE_USE_OLD_TRIMESH_TRIMESH_COLLIDER
+
+#define SMALL_ELT           REAL(2.5e-4)
+#define EXPANDED_ELT_THRESH REAL(1.0e-3)
+#define DISTANCE_EPSILON    REAL(1.0e-8)
+#define VELOCITY_EPSILON    REAL(1.0e-5)
+#define TINY_PENETRATION    REAL(5.0e-6)
+
+struct LineContactSet
+{
+    enum
+    {
+        MAX_POINTS = 8
+    };
+
+    dVector3 Points[MAX_POINTS];
+    int      Count;
+};
+
+
+// static void GetTriangleGeometryCallback(udword, VertexPointers&, udword); -- not used
+static inline void dMakeMatrix4(const dVector3 Position, const dMatrix3 Rotation, dMatrix4 &B);
+//static void dInvertMatrix4( dMatrix4& B, dMatrix4& Binv );
+//static int IntersectLineSegmentRay(dVector3, dVector3, dVector3, dVector3,  dVector3);
+static void ClipConvexPolygonAgainstPlane( const dVector3, dReal, LineContactSet& );
+
+
+///returns the penetration depth
+static dReal MostDeepPoints(
+                            LineContactSet & points,
+                            const dVector3 plane_normal,
+                            dReal plane_dist,
+                            LineContactSet & deep_points);
+
+static bool TriTriContacts(const dVector3 tr1[3],
+                           const dVector3 tr2[3],
+                           int TriIndex1, int TriIndex2,
+                           dxGeom* g1, dxGeom* g2, int Flags,
+                           CONTACT_KEY_HASH_TABLE &hashcontactset,
+                           dContactGeom* Contacts, int Stride,
+                           int &contactcount);
+
+
+/* some math macros */
+#define IS_ZERO(v) (!(v)[0] && !(v)[1] && !(v)[2])
+
+#define CROSS(dest,v1,v2) dCalcVectorCross3(dest, v1, v2)
+
+#define DOT(v1,v2) dCalcVectorDot3(v1, v2)
+
+#define SUB(dest,v1,v2) dSubtractVectors3(dest, v1, v2)
+
+#define ADD(dest,v1,v2) dAddVectors3(dest, v1, v2)
+
+#define MULT(dest,v,factor) dCopyScaledVector3(dest, v, factor)
+
+#define SET(dest,src) dCopyVector3(dest, src)
+
+#define SMULT(p,q,s) dCopyScaledVector3(p, q, s)
+
+#define COMBO(combo,p,t,q) dAddVectorScaledVector3(combo, p, q, t)
+
+#define LENGTH(x) dCalcVectorLength3(x)
+
+#define DEPTH(d, p, q, n) d = dCalcPointDepth3(q, p, n)
+
+
+static inline 
+void SwapNormals(dVector3 *&pen_v, dVector3 *&col_v, dVector3* v1, dVector3* v2,
+            dVector3 *&pen_elt, dVector3 *elt_f1, dVector3 *elt_f2,
+            dVector3 n, dVector3 n1, dVector3 n2)
+{
+    if (pen_v == v1) {
+        pen_v = v2;
+        pen_elt = elt_f2;
+        col_v = v1;
+        SET(n, n1);
+    }
+    else {
+        pen_v = v1;
+        pen_elt = elt_f1;
+        col_v = v2;
+        SET(n, n2);
+    }
+}
+
+///////////////////////MECHANISM FOR AVOID CONTACT REDUNDANCE///////////////////////////////
+////* Written by Francisco Le�n (http://gimpact.sourceforge.net) *///
+#define CONTACT_DIFF_EPSILON REAL(0.00001)
+#if defined(dDOUBLE)
+#define CONTACT_NORMAL_ZERO REAL(0.0000001)
+#else // if defined(dSINGLE)
+#define CONTACT_NORMAL_ZERO REAL(0.00001)
+#endif
+#define CONTACT_POS_HASH_QUOTIENT REAL(10000.0)
+#define dSQRT3	REAL(1.7320508075688773)
+
+static 
+void UpdateContactKey(CONTACT_KEY & key, dContactGeom * contact)
+{
+    key.m_contact = contact;
+
+    unsigned int hash=0;
+
+    int i = 0;
+
+    while (true)
+    {
+        dReal coord = contact->pos[i];
+        coord = dFloor(coord * CONTACT_POS_HASH_QUOTIENT);
+
+        const int sz = sizeof(coord) / sizeof(unsigned);
+        dIASSERT(sizeof(coord) % sizeof(unsigned) == 0);
+
+        unsigned hash_v[ sz ];
+        memcpy(hash_v, &coord, sizeof(coord));
+
+        unsigned int hash_input = hash_v[0];
+        for (int i=1; i<sz; ++i)
+            hash_input ^= hash_v[i];
+
+        hash = (( hash << 4 ) + (hash_input >> 24)) ^ ( hash >> 28 );
+        hash = (( hash << 4 ) + ((hash_input >> 16) & 0xFF)) ^ ( hash >> 28 );
+        hash = (( hash << 4 ) + ((hash_input >> 8) & 0xFF)) ^ ( hash >> 28 );
+        hash = (( hash << 4 ) + (hash_input & 0xFF)) ^ ( hash >> 28 );
+
+        if (++i == 3)
+        {
+            break;
+        }
+
+        hash = (hash << 11) | (hash >> 21);
+    }
+
+    key.m_key = hash;
+}
+
+
+static inline 
+unsigned int MakeContactIndex(unsigned int key)
+{
+    dIASSERT(CONTACTS_HASHSIZE == 256);
+
+    unsigned int index = key ^ (key >> 16);
+    index = (index ^ (index >> 8)) & 0xFF;
+
+    return index;
+}
+
+static 
+dContactGeom *AddContactToNode(const CONTACT_KEY * contactkey,CONTACT_KEY_HASH_NODE * node)
+{
+    for(int i=0;i<node->m_keycount;i++)
+    {
+        if(node->m_keyarray[i].m_key == contactkey->m_key)
+        {
+            dContactGeom *contactfound = node->m_keyarray[i].m_contact;
+            if (dCalcPointsDistance3(contactfound->pos, contactkey->m_contact->pos) < REAL(1.00001) /*for comp. errors*/ * dSQRT3 / CONTACT_POS_HASH_QUOTIENT /*cube diagonal*/)
+            {
+                return contactfound;
+            }
+        }
+    }
+
+    if (node->m_keycount < MAXCONTACT_X_NODE)
+    {
+        node->m_keyarray[node->m_keycount].m_contact = contactkey->m_contact;
+        node->m_keyarray[node->m_keycount].m_key = contactkey->m_key;
+        node->m_keycount++;
+    }
+    else
+    {
+        dDEBUGMSG("Trimesh-trimesh contach hash table bucket overflow - close contacts might not be culled");
+    }
+
+    return contactkey->m_contact;
+}
+
+static 
+void RemoveNewContactFromNode(const CONTACT_KEY * contactkey, CONTACT_KEY_HASH_NODE * node)
+{
+    dIASSERT(node->m_keycount > 0);
+
+    if (node->m_keyarray[node->m_keycount - 1].m_contact == contactkey->m_contact)
+    {
+        node->m_keycount -= 1;
+    }
+    else
+    {
+        dIASSERT(node->m_keycount == MAXCONTACT_X_NODE);
+    }
+}
+
+static 
+void RemoveArbitraryContactFromNode(const CONTACT_KEY *contactkey, CONTACT_KEY_HASH_NODE *node)
+{
+    dIASSERT(node->m_keycount > 0);
+
+    int keyindex, lastkeyindex = node->m_keycount - 1;
+
+    // Do not check the last contact
+    for (keyindex = 0; keyindex < lastkeyindex; keyindex++)
+    {
+        if (node->m_keyarray[keyindex].m_contact == contactkey->m_contact)
+        {
+            node->m_keyarray[keyindex] = node->m_keyarray[lastkeyindex];
+            break;
+        }
+    }
+
+    dIASSERT(keyindex < lastkeyindex || 
+        node->m_keyarray[keyindex].m_contact == contactkey->m_contact); // It has been either the break from loop or last element should match
+
+    node->m_keycount = lastkeyindex;
+}
+
+static 
+void UpdateArbitraryContactInNode(const CONTACT_KEY *contactkey, CONTACT_KEY_HASH_NODE *node,
+                                  dContactGeom *pwithcontact)
+{
+    dIASSERT(node->m_keycount > 0);
+
+    int keyindex, lastkeyindex = node->m_keycount - 1;
+
+    // Do not check the last contact
+    for (keyindex = 0; keyindex < lastkeyindex; keyindex++)
+    {
+        if (node->m_keyarray[keyindex].m_contact == contactkey->m_contact)
+        {
+            break;
+        }
+    }
+
+    dIASSERT(keyindex < lastkeyindex || 
+        node->m_keyarray[keyindex].m_contact == contactkey->m_contact); // It has been either the break from loop or last element should match
+
+    node->m_keyarray[keyindex].m_contact = pwithcontact;
+}
+
+static 
+void ClearContactSet(CONTACT_KEY_HASH_TABLE &hashcontactset)
+{
+    memset(&hashcontactset, 0, sizeof(CONTACT_KEY_HASH_TABLE));
+}
+
+//return true if found
+static 
+dContactGeom *InsertContactInSet(CONTACT_KEY_HASH_TABLE &hashcontactset, const CONTACT_KEY &newkey)
+{
+    unsigned int index = MakeContactIndex(newkey.m_key);
+
+    return AddContactToNode(&newkey, &hashcontactset[index]);
+}
+
+static 
+void RemoveNewContactFromSet(CONTACT_KEY_HASH_TABLE &hashcontactset, const CONTACT_KEY &contactkey)
+{
+    unsigned int index = MakeContactIndex(contactkey.m_key);
+
+    RemoveNewContactFromNode(&contactkey, &hashcontactset[index]);
+}
+
+static 
+void RemoveArbitraryContactFromSet(CONTACT_KEY_HASH_TABLE &hashcontactset, const CONTACT_KEY &contactkey)
+{
+    unsigned int index = MakeContactIndex(contactkey.m_key);
+
+    RemoveArbitraryContactFromNode(&contactkey, &hashcontactset[index]);
+}
+
+static 
+void UpdateArbitraryContactInSet(CONTACT_KEY_HASH_TABLE &hashcontactset, const CONTACT_KEY &contactkey, 
+                                 dContactGeom *pwithcontact)
+{
+    unsigned int index = MakeContactIndex(contactkey.m_key);
+
+    UpdateArbitraryContactInNode(&contactkey, &hashcontactset[index], pwithcontact);
+}
+
+static 
+bool AllocNewContact(
+                     const dVector3 newpoint, dContactGeom *& out_pcontact,
+                     int Flags, CONTACT_KEY_HASH_TABLE &hashcontactset,
+                     dContactGeom* Contacts, int Stride,  int &contactcount)
+{
+    bool allocated_new = false;
+
+    dContactGeom dLocalContact;
+
+    dContactGeom * pcontact = contactcount != (Flags & NUMC_MASK) ? 
+        SAFECONTACT(Flags, Contacts, contactcount, Stride) : &dLocalContact;
+
+    pcontact->pos[0] = newpoint[0];
+    pcontact->pos[1] = newpoint[1];
+    pcontact->pos[2] = newpoint[2];
+    pcontact->pos[3] = 1.0f;
+
+    CONTACT_KEY newkey;
+    UpdateContactKey(newkey, pcontact);
+
+    dContactGeom *pcontactfound = InsertContactInSet(hashcontactset, newkey);
+    if (pcontactfound == pcontact)
+    {
+        if (pcontactfound != &dLocalContact)
+        {
+            contactcount++;
+        }
+        else
+        {
+            RemoveNewContactFromSet(hashcontactset, newkey);
+            pcontactfound = NULL;
+        }
+
+        allocated_new = true;
+    }
+
+    out_pcontact = pcontactfound;
+    return allocated_new;
+}
+
+static 
+void FreeExistingContact(dContactGeom *pcontact,
+                         int Flags, CONTACT_KEY_HASH_TABLE &hashcontactset, 
+                         dContactGeom *Contacts, int Stride, int &contactcount)
+{
+    CONTACT_KEY contactKey;
+    UpdateContactKey(contactKey, pcontact);
+
+    RemoveArbitraryContactFromSet(hashcontactset, contactKey);
+
+    int lastContactIndex = contactcount - 1;
+    dContactGeom *plastContact = SAFECONTACT(Flags, Contacts, lastContactIndex, Stride);
+
+    if (pcontact != plastContact)
+    {
+        *pcontact = *plastContact;
+
+        CONTACT_KEY lastContactKey;
+        UpdateContactKey(lastContactKey, plastContact);
+
+        UpdateArbitraryContactInSet(hashcontactset, lastContactKey, pcontact);
+    }
+
+    contactcount = lastContactIndex;
+}
+
+
+static 
+dContactGeom *  PushNewContact( dxGeom* g1, dxGeom* g2, int TriIndex1, int TriIndex2,
+                               const dVector3 point,
+                               dVector3 normal,
+                               dReal  depth,
+                               int Flags, 
+                               CONTACT_KEY_HASH_TABLE &hashcontactset,
+                               dContactGeom* Contacts, int Stride,
+                               int &contactcount)
+{
+    dIASSERT(dFabs(dVector3Length((const dVector3 &)(*normal)) - REAL(1.0)) < REAL(1e-6)); // This assumption is used in the code
+
+    dContactGeom * pcontact;
+
+    if (!AllocNewContact(point, pcontact, Flags, hashcontactset, Contacts, Stride, contactcount))
+    {
+        const dReal depthDifference = depth - pcontact->depth;
+
+        if (depthDifference > CONTACT_DIFF_EPSILON)
+        {
+            pcontact->normal[0] = normal[0];
+            pcontact->normal[1] = normal[1];
+            pcontact->normal[2] = normal[2];
+            pcontact->normal[3] = REAL(1.0); // used to store length of vector sum for averaging
+            pcontact->depth = depth;
+
+            pcontact->g1 = g1;
+            pcontact->g2 = g2;
+            pcontact->side1 = TriIndex1;
+            pcontact->side2 = TriIndex2;
+        }
+        else if (depthDifference >= -CONTACT_DIFF_EPSILON) ///average
+        {
+            if(pcontact->g1 == g2)
+            {
+                MULT(normal,normal, REAL(-1.0));
+                int tempInt = TriIndex1; TriIndex1 = TriIndex2; TriIndex2 = tempInt;
+                // This should be discarded by optimizer as g1 and g2 are 
+                // not used any more but it's preferable to keep this line for 
+                // the sake of consistency in variable values.
+                dxGeom *tempGeom = g1; g1 = g2; g2 = tempGeom;
+            }
+
+            const dReal oldLen = pcontact->normal[3];
+            COMBO(pcontact->normal, normal, oldLen, pcontact->normal);
+
+            const dReal len = LENGTH(pcontact->normal);
+            if (len > CONTACT_NORMAL_ZERO)
+            {
+                MULT(pcontact->normal, pcontact->normal, REAL(1.0) / len);
+                pcontact->normal[3] = len;
+
+                pcontact->side1 = ((dxTriMesh *)pcontact->g1)->m_TriMergeCallback ? ((dxTriMesh *)pcontact->g1)->m_TriMergeCallback((dxTriMesh *)pcontact->g1, pcontact->side1, TriIndex1) : -1;
+                pcontact->side2 = ((dxTriMesh *)pcontact->g2)->m_TriMergeCallback ? ((dxTriMesh *)pcontact->g2)->m_TriMergeCallback((dxTriMesh *)pcontact->g2, pcontact->side2, TriIndex2) : -1;
+            }
+            else
+            {
+                FreeExistingContact(pcontact, Flags, hashcontactset, Contacts, Stride, contactcount);
+            }
+        }
+    }
+    // Contact can be not available if buffer is full
+    else if (pcontact)
+    {
+        pcontact->normal[0] = normal[0];
+        pcontact->normal[1] = normal[1];
+        pcontact->normal[2] = normal[2];
+        pcontact->normal[3] = REAL(1.0); // used to store length of vector sum for averaging
+        pcontact->depth = depth;
+        pcontact->g1 = g1;
+        pcontact->g2 = g2;
+        pcontact->side1 = TriIndex1;
+        pcontact->side2 = TriIndex2;
+    }
+
+    return pcontact;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+/*extern */
+int dCollideTTL(dxGeom* g1, dxGeom* g2, int Flags, dContactGeom* Contacts, int Stride)
+{
+    dIASSERT (Stride >= (int)sizeof(dContactGeom));
+    dIASSERT (g1->type == dTriMeshClass);
+    dIASSERT (g2->type == dTriMeshClass);
+    dIASSERT ((Flags & NUMC_MASK) >= 1);
+
+    dxTriMesh* TriMesh1 = (dxTriMesh*) g1;
+    dxTriMesh* TriMesh2 = (dxTriMesh*) g2;
+
+    //dReal * TriNormals1 = (dReal *) TriMesh1->Data->Normals;
+    //dReal * TriNormals2 = (dReal *) TriMesh2->Data->Normals;
+
+    const dVector3& TLPosition1 = *(const dVector3*) dGeomGetPosition(TriMesh1);
+    // TLRotation1 = column-major order
+    const dMatrix3& TLRotation1 = *(const dMatrix3*) dGeomGetRotation(TriMesh1);
+
+    const dVector3& TLPosition2 = *(const dVector3*) dGeomGetPosition(TriMesh2);
+    // TLRotation2 = column-major order
+    const dMatrix3& TLRotation2 = *(const dMatrix3*) dGeomGetRotation(TriMesh2);
+
+    const unsigned uiTLSKind = TriMesh1->getParentSpaceTLSKind();
+    dIASSERT(uiTLSKind == TriMesh2->getParentSpaceTLSKind()); // The colliding spaces must use matching cleanup method
+    TrimeshCollidersCache *pccColliderCache = GetTrimeshCollidersCache(uiTLSKind);
+    AABBTreeCollider& Collider = pccColliderCache->m_AABBTreeCollider;
+    BVTCache &ColCache = pccColliderCache->ColCache;
+    CONTACT_KEY_HASH_TABLE &hashcontactset = pccColliderCache->m_hashcontactset;
+
+    ColCache.Model0 = &TriMesh1->retrieveMeshBVTreeRef();
+    ColCache.Model1 = &TriMesh2->retrieveMeshBVTreeRef();
+
+    ////Prepare contact list
+    ClearContactSet(hashcontactset);
+
+    // Collision query
+    Matrix4x4 amatrix, bmatrix;
+    dVector3 TLOffsetPosition1 = { REAL(0.0), };
+    dVector3 TLOffsetPosition2;
+    dSubtractVectors3(TLOffsetPosition2, TLPosition2, TLPosition1);
+    MakeMatrix(TLOffsetPosition1, TLRotation1, amatrix);
+    MakeMatrix(TLOffsetPosition2, TLRotation2, bmatrix);
+    bool IsOk = Collider.Collide(ColCache, &amatrix, &bmatrix);
+
+
+    if (IsOk) {
+        // Get collision status => if true, objects overlap
+        if ( Collider.GetContactStatus() ) {
+            // Number of colliding pairs and list of pairs
+            int TriCount = Collider.GetNbPairs();
+            const Pair* CollidingPairs = Collider.GetPairs();
+
+            if (TriCount > 0) {
+                // step through the pairs, adding contacts
+                int             id1, id2;
+                int             OutTriCount = 0;
+                dVector3        v1[3], v2[3];
+
+                for (int i = 0; i < TriCount; i++)
+                {
+                    id1 = CollidingPairs[i].id0;
+                    id2 = CollidingPairs[i].id1;
+
+                    // grab the colliding triangles
+                    static_cast<dxTriMesh *>(g1)->fetchMeshTriangle(v1, id1, TLPosition1, TLRotation1);
+                    static_cast<dxTriMesh *>(g2)->fetchMeshTriangle(v2, id2, TLPosition2, TLRotation2);
+
+                    // Since we'll be doing matrix transformations, we need to
+                    //  make sure that all vertices have four elements
+                    for (int j=0; j<3; j++) {
+                        v1[j][3] = 1.0;
+                        v2[j][3] = 1.0;
+                    }
+
+                    TriTriContacts(v1,v2, id1,id2,
+                        g1, g2, Flags, hashcontactset,
+                        Contacts,Stride,OutTriCount);
+
+                    // Continue loop even after contacts are full 
+                    // as existing contacts' normals/depths might be updated
+                    // Break only if contacts are not important
+                    if ((OutTriCount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT)))
+                    {
+                        break;
+                    }
+                }
+
+                // Return the number of contacts
+                return OutTriCount;
+
+            }
+        }
+    }
+
+
+    // There was some kind of failure during the Collide call or
+    // there are no faces overlapping
+    return 0;
+}
+
+
+/* -- not used
+static void
+GetTriangleGeometryCallback(udword triangleindex, VertexPointers& triangle, udword user_data)
+{
+    dVector3 Out[3];
+
+    FetchTriangle((dxTriMesh*) user_data, (int) triangleindex, Out);
+
+    for (int i = 0; i < 3; i++)
+        triangle.Vertex[i] =  (const Point*) ((dReal*) Out[i]);
+}
+*/
+
+//
+//
+//
+#define B11   B[0]
+#define B12   B[1]
+#define B13   B[2]
+#define B14   B[3]
+#define B21   B[4]
+#define B22   B[5]
+#define B23   B[6]
+#define B24   B[7]
+#define B31   B[8]
+#define B32   B[9]
+#define B33   B[10]
+#define B34   B[11]
+#define B41   B[12]
+#define B42   B[13]
+#define B43   B[14]
+#define B44   B[15]
+
+#define Binv11   Binv[0]
+#define Binv12   Binv[1]
+#define Binv13   Binv[2]
+#define Binv14   Binv[3]
+#define Binv21   Binv[4]
+#define Binv22   Binv[5]
+#define Binv23   Binv[6]
+#define Binv24   Binv[7]
+#define Binv31   Binv[8]
+#define Binv32   Binv[9]
+#define Binv33   Binv[10]
+#define Binv34   Binv[11]
+#define Binv41   Binv[12]
+#define Binv42   Binv[13]
+#define Binv43   Binv[14]
+#define Binv44   Binv[15]
+
+static inline 
+void dMakeMatrix4(const dVector3 Position, const dMatrix3 Rotation, dMatrix4 &B)
+{
+    B11 = Rotation[0]; B21 = Rotation[1]; B31 = Rotation[2];    B41 = Position[0];
+    B12 = Rotation[4]; B22 = Rotation[5]; B32 = Rotation[6];    B42 = Position[1];
+    B13 = Rotation[8]; B23 = Rotation[9]; B33 = Rotation[10];   B43 = Position[2];
+
+    B14 = 0.0;         B24 = 0.0;         B34 = 0.0;            B44 = 1.0;
+}
+
+#if 0
+static void
+dInvertMatrix4( dMatrix4& B, dMatrix4& Binv )
+{
+    dReal det =  (B11 * B22 - B12 * B21) * (B33 * B44 - B34 * B43)
+        -(B11 * B23 - B13 * B21) * (B32 * B44 - B34 * B42)
+        +(B11 * B24 - B14 * B21) * (B32 * B43 - B33 * B42)
+        +(B12 * B23 - B13 * B22) * (B31 * B44 - B34 * B41)
+        -(B12 * B24 - B14 * B22) * (B31 * B43 - B33 * B41)
+        +(B13 * B24 - B14 * B23) * (B31 * B42 - B32 * B41);
+
+    dAASSERT (det != 0.0);
+
+    det = 1.0 / det;
+
+    Binv11 = (dReal) (det * ((B22 * B33) - (B23 * B32)));
+    Binv12 = (dReal) (det * ((B32 * B13) - (B33 * B12)));
+    Binv13 = (dReal) (det * ((B12 * B23) - (B13 * B22)));
+    Binv14 = 0.0f;
+    Binv21 = (dReal) (det * ((B23 * B31) - (B21 * B33)));
+    Binv22 = (dReal) (det * ((B33 * B11) - (B31 * B13)));
+    Binv23 = (dReal) (det * ((B13 * B21) - (B11 * B23)));
+    Binv24 = 0.0f;
+    Binv31 = (dReal) (det * ((B21 * B32) - (B22 * B31)));
+    Binv32 = (dReal) (det * ((B31 * B12) - (B32 * B11)));
+    Binv33 = (dReal) (det * ((B11 * B22) - (B12 * B21)));
+    Binv34 = 0.0f;
+    Binv41 = (dReal) (det * (B21*(B33*B42 - B32*B43) + B22*(B31*B43 - B33*B41) + B23*(B32*B41 - B31*B42)));
+    Binv42 = (dReal) (det * (B31*(B13*B42 - B12*B43) + B32*(B11*B43 - B13*B41) + B33*(B12*B41 - B11*B42)));
+    Binv43 = (dReal) (det * (B41*(B13*B22 - B12*B23) + B42*(B11*B23 - B13*B21) + B43*(B12*B21 - B11*B22)));
+    Binv44 = 1.0f;
+}
+#endif
+
+
+// Find the intersectiojn point between a coplanar line segement,
+// defined by X1 and X2, and a ray defined by X3 and direction N.
+//
+// This forumla for this calculation is:
+//               (c x b) . (a x b)
+//   Q = x1 + a -------------------
+//                  | a x b | ^2
+//
+// where a = x2 - x1
+//       b = x4 - x3
+//       c = x3 - x1
+// x1 and x2 are the edges of the triangle, and x3 is CoplanarPt
+//  and x4 is (CoplanarPt - n)
+#if 0
+static int
+IntersectLineSegmentRay(dVector3 x1, dVector3 x2, dVector3 x3, dVector3 n,
+                        dVector3 out_pt)
+{
+    dVector3 a, b, c, x4;
+
+    ADD(x4, x3, n);  // x4 = x3 + n
+
+    SUB(a, x2, x1);  // a = x2 - x1
+    SUB(b, x4, x3);
+    SUB(c, x3, x1);
+
+    dVector3 tmp1, tmp2;
+    CROSS(tmp1, c, b);
+    CROSS(tmp2, a, b);
+
+    dReal num, denom;
+    num = dCalcVectorDot3(tmp1, tmp2);
+    denom = LENGTH( tmp2 );
+
+    dReal s;
+    s = num /(denom*denom);
+
+    for (int i=0; i<3; i++)
+        out_pt[i] = x1[i] + a[i]*s;
+
+    // Test if this intersection is "behind" x3, w.r.t. n
+    SUB(a, x3, out_pt);
+    if (dCalcVectorDot3(a, n) > 0.0)
+        return 0;
+
+    // Test if this intersection point is outside the edge limits,
+    //  if (dot( (out_pt-x1), (out_pt-x2) ) < 0) it's inside
+    //  else outside
+    SUB(a, out_pt, x1);
+    SUB(b, out_pt, x2);
+    if (dCalcVectorDot3(a,b) < 0.0)
+        return 1;
+    else
+        return 0;
+}
+#endif
+
+
+void PlaneClipSegment( const dVector3  s1, const dVector3  s2,
+                      const dVector3  N, dReal C, dVector3  clipped)
+{
+    dReal dis1,dis2;
+    dis1 = DOT(s1,N)-C;
+    SUB(clipped,s2,s1);
+    dis2 = DOT(clipped,N);
+    MULT(clipped,clipped,-dis1/dis2);
+    ADD(clipped,clipped,s1);
+    clipped[3] = 1.0f;
+}
+
+/* ClipConvexPolygonAgainstPlane - Clip a a convex polygon, described by
+CONTACTS, with a plane (described by N and C distance from origin).
+Note:  the input vertices are assumed to be in invcounterclockwise order.
+changed by Francisco Leon (http://gimpact.sourceforge.net) */
+static void
+ClipConvexPolygonAgainstPlane( const dVector3 N, dReal C,
+                              LineContactSet& Contacts )
+{
+    int  i, vi, prevclassif=32000, classif;
+    /*
+    classif 0 : back, 1 : front
+    */
+
+    dReal d;
+    dVector3 clipped[8];
+    int clippedcount =0;
+
+    if(Contacts.Count==0)
+    {
+        return;
+    }
+    for(i=0;i<=Contacts.Count;i++)
+    {
+        vi = i%Contacts.Count;
+
+        d = DOT(N,Contacts.Points[vi]) - C;
+        ////classify point
+        if(d>REAL(1.0e-8))	classif =  1;
+        else  classif =  0;
+
+        if(classif == 0)//back
+        {
+            if(i>0)
+            {
+                if(prevclassif==1)///in front
+                {
+
+                    ///add clipped point
+                    if(clippedcount<8)
+                    {
+                        PlaneClipSegment(Contacts.Points[i-1],Contacts.Points[vi],
+                            N,C,clipped[clippedcount]);
+                        clippedcount++;
+                    }
+                }
+            }
+            ///add point
+            if(clippedcount<8&&i<Contacts.Count)
+            {
+                clipped[clippedcount][0] = Contacts.Points[vi][0];
+                clipped[clippedcount][1] = Contacts.Points[vi][1];
+                clipped[clippedcount][2] = Contacts.Points[vi][2];
+                clipped[clippedcount][3] = 1.0f;
+                clippedcount++;
+            }
+        }
+        else
+        {
+
+            if(i>0)
+            {
+                if(prevclassif==0)
+                {
+                    ///add point
+                    if(clippedcount<8)
+                    {
+                        PlaneClipSegment(Contacts.Points[i-1],Contacts.Points[vi],
+                            N,C,clipped[clippedcount]);
+                        clippedcount++;
+                    }
+                }
+            }
+        }
+
+        prevclassif	= classif;
+    }
+
+    if(clippedcount==0)
+    {
+        Contacts.Count = 0;
+        return;
+    }
+    Contacts.Count = clippedcount;
+    memcpy( Contacts.Points, clipped, clippedcount * sizeof(dVector3) );
+    return;
+}
+
+
+bool BuildPlane(const dVector3 s0, const dVector3 s1,const dVector3 s2,
+                dVector3 Normal, dReal & Dist)
+{
+    dVector3 e0,e1;
+    SUB(e0,s1,s0);
+    SUB(e1,s2,s0);
+
+    CROSS(Normal,e0,e1);
+
+    if (!dSafeNormalize3(Normal))
+    {
+        return false;
+    }
+
+    Dist = DOT(Normal,s0);
+    return true;
+
+}
+
+// bool BuildEdgesDir(const dVector3 s0, const dVector3 s1,
+//                    const dVector3 t0, const dVector3 t1,
+//                    dVector3 crossdir)
+// {
+//     dVector3 e0,e1;
+// 
+//     SUB(e0,s1,s0);
+//     SUB(e1,t1,t0);
+//     CROSS(crossdir,e0,e1);
+// 
+//     if (!dSafeNormalize3(crossdir))
+//     {
+//         return false;
+//     }
+//     return true;
+// }
+
+
+
+bool BuildEdgePlane(
+                    const dVector3 s0, const dVector3 s1,
+                    const dVector3 normal,
+                    dVector3 plane_normal,
+                    dReal & plane_dist)
+{
+    dVector3 e0;
+
+    SUB(e0,s1,s0);
+    CROSS(plane_normal,e0,normal);
+    if (!dSafeNormalize3(plane_normal))
+    {
+        return false;
+    }
+    plane_dist = DOT(plane_normal,s0);
+    return true;
+}
+
+
+
+
+/*
+Positive penetration
+Negative number: they are separated
+*/
+dReal IntervalPenetration(dReal &vmin1,dReal &vmax1,
+                          dReal &vmin2,dReal &vmax2)
+{
+    if(vmax1<=vmin2)
+    {
+        return -(vmin2-vmax1);
+    }
+    else
+    {
+        if(vmax2<=vmin1)
+        {
+            return -(vmin1-vmax2);
+        }
+        else
+        {
+            if(vmax1<=vmax2)
+            {
+                return vmax1-vmin2;
+            }
+
+            return vmax2-vmin1;
+        }
+
+    }
+    return 0;
+}
+
+void FindInterval(
+                  const dVector3 * vertices, int verticecount,
+                  dVector3 dir,dReal &vmin,dReal &vmax)
+{
+
+    dReal dist;
+    int i;
+    vmin = DOT(vertices[0],dir);
+    vmax = vmin;
+    for(i=1;i<verticecount;i++)
+    {
+        dist = DOT(vertices[i],dir);
+        if(vmin>dist) vmin=dist;
+        else if(vmax<dist) vmax=dist;
+    }
+}
+
+///returns the penetration depth
+dReal MostDeepPoints(
+                     LineContactSet & points,
+                     const dVector3 plane_normal,
+                     dReal plane_dist,
+                     LineContactSet & deep_points)
+{
+    int i;
+    int max_candidates[8];
+    dReal maxdeep=-dInfinity;
+    dReal dist;
+
+    deep_points.Count = 0;
+    for(i=0;i<points.Count;i++)
+    {
+        dist = DOT(plane_normal,points.Points[i]) - plane_dist;
+        dist *= -1.0f;
+        if(dist>maxdeep)
+        {
+            maxdeep = dist;
+            deep_points.Count=1;
+            max_candidates[deep_points.Count-1] = i;
+        }
+        else if(dist+REAL(0.000001)>=maxdeep)
+        {
+            deep_points.Count++;
+            max_candidates[deep_points.Count-1] = i;
+        }
+    }
+
+    for(i=0;i<deep_points.Count;i++)
+    {
+        SET(deep_points.Points[i],points.Points[max_candidates[i]]);
+    }
+    return maxdeep;
+
+}
+
+void ClipPointsByTri(
+                     const dVector3 * points, int pointcount,
+                     const dVector3 tri[3],
+                     const dVector3 triplanenormal,
+                     dReal triplanedist,
+                     LineContactSet & clipped_points,
+                     bool triplane_clips)
+{
+    ///build edges planes
+    int i;
+    dVector4 plane;
+
+    clipped_points.Count = pointcount;
+    memcpy(&clipped_points.Points[0],&points[0],pointcount*sizeof(dVector3));
+    for(i=0;i<3;i++)
+    {
+        if (BuildEdgePlane(
+            tri[i],tri[(i+1)%3],triplanenormal,
+            plane,plane[3]))
+        {
+            ClipConvexPolygonAgainstPlane(
+                plane,
+                plane[3],
+                clipped_points);
+        }
+    }
+
+    if(triplane_clips)
+    {
+        ClipConvexPolygonAgainstPlane(
+            triplanenormal,
+            triplanedist,
+            clipped_points);
+    }
+}
+
+
+///returns the penetration depth
+dReal FindTriangleTriangleCollision(
+                                    const dVector3 tri1[3],
+                                    const dVector3 tri2[3],
+                                    dVector3 separating_normal,
+                                    LineContactSet & deep_points)
+{
+    dReal maxdeep=dInfinity;
+    dReal dist;
+    int mostdir=0, /*mostface=0,*/ currdir=0;
+    //	dReal vmin1,vmax1,vmin2,vmax2;
+    //	dVector3 crossdir, pt1,pt2;
+    dVector4 tri1plane,tri2plane;
+    separating_normal[3] = 0.0f;
+    bool bl;
+    LineContactSet clipped_points1,clipped_points2;
+    LineContactSet deep_points1,deep_points2;
+    // It is necessary to initialize the count because both conditional statements 
+    // might be skipped leading to uninitialized count being used for memcpy in if(mostdir==0)
+    deep_points1.Count = 0;
+
+    ////find interval face1
+
+    bl = BuildPlane(tri1[0],tri1[1],tri1[2],
+        tri1plane,tri1plane[3]);
+    clipped_points1.Count = 0;
+
+    if(bl)
+    {
+        ClipPointsByTri(
+            tri2, 3,
+            tri1,
+            tri1plane,
+            tri1plane[3],
+            clipped_points1,false);
+
+
+
+        maxdeep = MostDeepPoints(
+            clipped_points1,
+            tri1plane,
+            tri1plane[3],
+            deep_points1);
+        SET(separating_normal,tri1plane);
+
+    }
+    currdir++;
+
+    ////find interval face2
+
+    bl = BuildPlane(tri2[0],tri2[1],tri2[2],
+        tri2plane,tri2plane[3]);
+
+
+    clipped_points2.Count = 0;
+    if(bl)
+    {
+        ClipPointsByTri(
+            tri1, 3,
+            tri2,
+            tri2plane,
+            tri2plane[3],
+            clipped_points2,false);
+
+
+
+        dist = MostDeepPoints(
+            clipped_points2,
+            tri2plane,
+            tri2plane[3],
+            deep_points2);
+
+
+
+        if(dist<maxdeep)
+        {
+            maxdeep = dist;
+            mostdir = currdir;
+            //mostface = 1;
+            SET(separating_normal,tri2plane);
+        }
+    }
+    currdir++;
+
+
+    ///find edge edge distances
+    ///test each edge plane
+
+    /*for(i=0;i<3;i++)
+    {
+
+
+    for(j=0;j<3;j++)
+    {
+
+
+    bl = BuildEdgesDir(
+    tri1[i],tri1[(i+1)%3],
+    tri2[j],tri2[(j+1)%3],
+    crossdir);
+
+    ////find plane distance
+
+    if(bl)
+    {
+    FindInterval(tri1,3,crossdir,vmin1,vmax1);
+    FindInterval(tri2,3,crossdir,vmin2,vmax2);
+
+    dist = IntervalPenetration(
+    vmin1,
+    vmax1,
+    vmin2,
+    vmax2);
+    if(dist<maxdeep)
+    {
+    maxdeep = dist;
+    mostdir = currdir;
+    SET(separating_normal,crossdir);
+    }
+    }
+    currdir++;
+    }
+    }*/
+
+
+    ////check most dir for contacts
+    if(mostdir==0)
+    {
+        ///find most deep points
+        deep_points.Count = deep_points1.Count;
+        memcpy(
+            &deep_points.Points[0],
+            &deep_points1.Points[0],
+            deep_points1.Count*sizeof(dVector3));
+
+        ///invert normal for point to tri1
+        MULT(separating_normal,separating_normal,-1.0f);
+    }
+    else if(mostdir==1)
+    {
+        deep_points.Count = deep_points2.Count;
+        memcpy(
+            &deep_points.Points[0],
+            &deep_points2.Points[0],
+            deep_points2.Count*sizeof(dVector3));
+
+    }
+    /*else
+    {///edge separation
+    mostdir -= 2;
+
+    //edge 2
+    j = mostdir%3;
+    //edge 1
+    i = mostdir/3;
+
+    ///find edge closest points
+    dClosestLineSegmentPoints(
+    tri1[i],tri1[(i+1)%3],
+    tri2[j],tri2[(j+1)%3],
+    pt1,pt2);
+    ///find correct direction
+
+    SUB(crossdir,pt2,pt1);
+
+    vmin1 = LENGTH(crossdir);
+    if(vmin1<REAL(0.000001))
+    {
+
+    if(mostface==0)
+    {
+    vmin1 = DOT(separating_normal,tri1plane);
+    if(vmin1>0.0)
+    {
+    MULT(separating_normal,separating_normal,-1.0f);
+    deep_points.Count = 1;
+    SET(deep_points.Points[0],pt2);
+    }
+    else
+    {
+    deep_points.Count = 1;
+    SET(deep_points.Points[0],pt2);
+    }
+
+    }
+    else
+    {
+    vmin1 = DOT(separating_normal,tri2plane);
+    if(vmin1<0.0)
+    {
+    MULT(separating_normal,separating_normal,-1.0f);
+    deep_points.Count = 1;
+    SET(deep_points.Points[0],pt2);
+    }
+    else
+    {
+    deep_points.Count = 1;
+    SET(deep_points.Points[0],pt2);
+    }
+
+    }
+
+
+
+
+    }
+    else
+    {
+    MULT(separating_normal,crossdir,1.0f/vmin1);
+
+    vmin1 = DOT(separating_normal,tri1plane);
+    if(vmin1>0.0)
+    {
+    MULT(separating_normal,separating_normal,-1.0f);
+    deep_points.Count = 1;
+    SET(deep_points.Points[0],pt2);
+    }
+    else
+    {
+    deep_points.Count = 1;
+    SET(deep_points.Points[0],pt2);
+    }
+
+
+    }
+
+
+    }*/
+    return maxdeep;
+}
+
+
+
+///SUPPORT UP TO 8 CONTACTS
+bool TriTriContacts(const dVector3 tr1[3],
+                    const dVector3 tr2[3],
+                    int TriIndex1, int TriIndex2,
+                    dxGeom* g1, dxGeom* g2, int Flags, 
+                    CONTACT_KEY_HASH_TABLE &hashcontactset,
+                    dContactGeom* Contacts, int Stride,
+                    int &contactcount)
+{
+
+
+    dVector4 normal;
+    dReal depth;
+    ///Test Tri Vs Tri
+    //	dContactGeom* pcontact;
+    int ccount = 0;
+    LineContactSet contactpoints;
+    contactpoints.Count = 0;
+
+
+
+    ///find best direction
+
+    depth = FindTriangleTriangleCollision(
+        tr1,
+        tr2,
+        normal,
+        contactpoints);
+
+
+
+    if(depth<0.0f) return false;
+
+    ccount = 0;
+    while (ccount<contactpoints.Count)
+    {
+        PushNewContact( g1,  g2, TriIndex1, TriIndex2,
+            contactpoints.Points[ccount],
+            normal, depth, Flags, hashcontactset,
+            Contacts,Stride,contactcount);
+
+        // Continue loop even after contacts are full 
+        // as existing contacts' normals/depths might be updated
+        // Break only if contacts are not important
+        if ((contactcount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT)))
+        {
+            break;
+        }
+
+        ccount++;
+    }
+    return true;
+}
+
+
+#endif // !dTRIMESH_OPCODE_USE_OLD_TRIMESH_TRIMESH_COLLIDER
+
+
+#endif // dTRIMESH_OPCODE
+
+
+//////////////////////////////////////////////////////////////////////////
+
+#if dTRIMESH_GIMPACT
+
+#include "gimpact_contact_export_helper.h"
+#include "gimpact_gim_contact_accessor.h"
+
+
+//
+// GIMPACT TRIMESH-TRIMESH COLLIDER
+//
+
+/*extern */
+int dCollideTTL(dxGeom* g1, dxGeom* g2, int Flags, dContactGeom* Contacts, int Stride)
+{
+    dIASSERT (Stride >= (int)sizeof(dContactGeom));
+    dIASSERT (g1->type == dTriMeshClass);
+    dIASSERT (g2->type == dTriMeshClass);
+    dIASSERT ((Flags & NUMC_MASK) >= 1);
+
+    int result = 0;
+
+    dxTriMesh *triMesh1 = static_cast<dxTriMesh *>(g1);
+    dxTriMesh *triMesh2 = static_cast<dxTriMesh *>(g2);
+    //Create contact list
+    GDYNAMIC_ARRAY trimeshContacts;
+    GIM_CREATE_CONTACT_LIST(trimeshContacts);
+
+    triMesh1->recomputeAABB();
+    triMesh2->recomputeAABB();
+
+    //Collide trimeshes
+    gim_trimesh_trimesh_collision(&triMesh1->m_collision_trimesh, &triMesh2->m_collision_trimesh, &trimeshContacts);
+
+    unsigned contactCount = trimeshContacts.m_size;
+
+    if (contactCount != 0)
+    {
+        GIM_CONTACT *pTriMeshContacts = GIM_DYNARRAY_POINTER(GIM_CONTACT, trimeshContacts);
+
+        dxGIMCContactAccessor contactAccessor(pTriMeshContacts, g1, g2);
+        unsigned culledContactCount = dxGImpactContactsExportHelper::ExportMaxDepthGImpactContacts(contactAccessor, contactCount, Flags, Contacts, Stride);
+
+        result = culledContactCount;
+    }
+
+    GIM_DYNARRAY_DESTROY(trimeshContacts);
+
+    return result;
+}
+
+
+#endif // dTRIMESH_GIMPACT
+
+#endif // dTRIMESH_ENABLED
+
diff --git a/libs/ode-0.16.1/ode/src/collision_trimesh_trimesh_old.cpp b/libs/ode-0.16.1/ode/src/collision_trimesh_trimesh_old.cpp
new file mode 100644
index 0000000..23d04a1
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_trimesh_trimesh_old.cpp
@@ -0,0 +1,2071 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// OPCODE TriMesh/TriMesh collision code by Jeff Smith (c) 2004
+
+#ifdef _MSC_VER
+#pragma warning(disable:4244 4305)  // for VC++, no precision loss complaints
+#endif
+
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+
+
+#if dTRIMESH_ENABLED
+
+#include "collision_util.h"
+#include "collision_trimesh_internal.h"
+
+
+#if dTRIMESH_OPCODE
+
+// Classic Implementation
+#if dTRIMESH_OPCODE_USE_OLD_TRIMESH_TRIMESH_COLLIDER
+
+#define SMALL_ELT           REAL(2.5e-4)
+#define EXPANDED_ELT_THRESH REAL(1.0e-3)
+#define DISTANCE_EPSILON    REAL(1.0e-8)
+#define VELOCITY_EPSILON    REAL(1.0e-5)
+#define TINY_PENETRATION    REAL(5.0e-6)
+
+struct LineContactSet
+{
+    enum
+    {
+        MAX_POINTS = 8
+    };
+
+    dVector3 Points[MAX_POINTS];
+    int      Count;
+};
+
+
+// static void GetTriangleGeometryCallback(udword, VertexPointers&, udword); -- not used
+static void GenerateContact(int, dContactGeom*, int, dxTriMesh*,  dxTriMesh*, 
+                            int TriIndex1, int TriIndex2,
+                            const dVector3, const dVector3, dReal, int&);
+static int TriTriIntersectWithIsectLine(dReal V0[3],dReal V1[3],dReal V2[3],
+                                        dReal U0[3],dReal U1[3],dReal U2[3],int *coplanar,
+                                        dReal isectpt1[3],dReal isectpt2[3]);
+inline void dMakeMatrix4(const dVector3 Position, const dMatrix3 Rotation, dMatrix4 &B);
+static void dInvertMatrix4( dMatrix4& B, dMatrix4& Binv );
+//static int IntersectLineSegmentRay(dVector3, dVector3, dVector3, dVector3,  dVector3);
+static bool FindTriSolidIntrsection(const dVector3 Tri[3], 
+                                    const dVector4 Planes[6], int numSides,
+                                    LineContactSet& ClippedPolygon );
+static void ClipConvexPolygonAgainstPlane( const dVector3, dReal, LineContactSet& );
+static bool SimpleUnclippedTest(dVector3 in_CoplanarPt, dVector3 in_v, dVector3 in_elt,
+                                dVector3 in_n, dVector3* in_col_v, dReal &out_depth);
+static int ExamineContactPoint(dVector3* v_col, dVector3 in_n, dVector3 in_point);
+static int RayTriangleIntersect(const dVector3 orig, const dVector3 dir,
+                                const dVector3 vert0, const dVector3 vert1,const dVector3 vert2,
+                                dReal *t,dReal *u,dReal *v);
+
+
+
+
+/* some math macros */
+#define IS_ZERO(v) (!(v)[0] && !(v)[1] && !(v)[2])
+
+#define CROSS(dest,v1,v2) dCalcVectorCross3(dest, v1, v2)
+
+#define DOT(v1,v2) dCalcVectorDot3(v1, v2)
+
+#define SUB(dest,v1,v2) dSubtractVectors3(dest, v1, v2)
+
+#define ADD(dest,v1,v2) dAddVectors3(dest, v1, v2)
+
+#define MULT(dest,v,factor) dCopyScaledVector3(dest, v, factor)
+
+#define SET(dest,src) dCopyVector3(dest, src)
+
+#define SMULT(p,q,s) dCopyScaledVector3(p, q, s)
+
+#define LENGTH(x) dCalcVectorLength3(x)
+
+#define DEPTH(d, p, q, n) d = dCalcPointDepth3(q, p, n)
+
+
+inline void
+SwapNormals(dVector3 *&pen_v, dVector3 *&col_v, dVector3* v1, dVector3* v2,
+            dVector3 *&pen_elt, dVector3 *elt_f1, dVector3 *elt_f2,
+            dVector3 n, dVector3 n1, dVector3 n2)
+{
+    if (pen_v == v1) {
+        pen_v = v2;
+        pen_elt = elt_f2;
+        col_v = v1;
+        SET(n, n1);
+    }
+    else {
+        pen_v = v1;
+        pen_elt = elt_f1;
+        col_v = v2;
+        SET(n, n2);
+    }
+}
+
+
+
+
+int 
+dCollideTTL(dxGeom* g1, dxGeom* g2, int Flags, dContactGeom* Contacts, int Stride)
+{
+    dIASSERT (Stride >= (int)sizeof(dContactGeom));
+    dIASSERT (g1->type == dTriMeshClass);
+    dIASSERT (g2->type == dTriMeshClass);
+    dIASSERT ((Flags & NUMC_MASK) >= 1);
+
+    dxTriMesh* TriMesh1 = (dxTriMesh*) g1;
+    dxTriMesh* TriMesh2 = (dxTriMesh*) g2;
+
+    const dReal* TriNormals1 = TriMesh1->retrieveMeshNormals();
+    const dReal* TriNormals2 = TriMesh2->retrieveMeshNormals();
+
+    const dVector3& TLPosition1 = *(const dVector3*) dGeomGetPosition(TriMesh1);
+    // TLRotation1 = column-major order
+    const dMatrix3& TLRotation1 = *(const dMatrix3*) dGeomGetRotation(TriMesh1);
+
+    const dVector3& TLPosition2 = *(const dVector3*) dGeomGetPosition(TriMesh2);
+    // TLRotation2 = column-major order
+    const dMatrix3& TLRotation2 = *(const dMatrix3*) dGeomGetRotation(TriMesh2);
+
+    const unsigned uiTLSKind = TriMesh1->getParentSpaceTLSKind();
+    dIASSERT(uiTLSKind == TriMesh2->getParentSpaceTLSKind()); // The colliding spaces must use matching cleanup method
+    TrimeshCollidersCache *pccColliderCache = GetTrimeshCollidersCache(uiTLSKind);
+    AABBTreeCollider& Collider = pccColliderCache->m_AABBTreeCollider;
+    BVTCache &ColCache = pccColliderCache->ColCache;
+
+    ColCache.Model0 = &TriMesh1->retrieveMeshBVTreeRef();
+    ColCache.Model1 = &TriMesh2->retrieveMeshBVTreeRef();
+
+    // Collision query
+    Matrix4x4 amatrix, bmatrix;
+    dVector3 TLOffsetPosition1 = { REAL(0.0), };
+    dVector3 TLOffsetPosition2;
+    dSubtractVectors3(TLOffsetPosition2, TLPosition2, TLPosition1);
+    MakeMatrix(TLOffsetPosition1, TLRotation1, amatrix);
+    MakeMatrix(TLOffsetPosition2, TLRotation2, bmatrix);
+    BOOL IsOk = Collider.Collide(ColCache, &amatrix, &bmatrix);
+
+
+    // Make "double" versions of these matrices, if appropriate
+    dMatrix4 A, B;
+    dMakeMatrix4(TLPosition1, TLRotation1, A);
+    dMakeMatrix4(TLPosition2, TLRotation2, B);
+
+
+    if (IsOk) {
+        // Get collision status => if true, objects overlap
+        if ( Collider.GetContactStatus() ) {
+            // Number of colliding pairs and list of pairs
+            int TriCount = Collider.GetNbPairs();
+            const Pair* CollidingPairs = Collider.GetPairs();
+
+            if (TriCount > 0) {
+                // step through the pairs, adding contacts
+                int             id1, id2;
+                int             OutTriCount = 0;
+                dVector3        v1[3], v2[3], CoplanarPt;
+                dVector3        e1, e2, e3, n1, n2, n, ContactNormal;
+                dReal           depth;
+                dVector3        orig_pos, old_pos1, old_pos2, elt1, elt2, elt_sum;
+                dVector3        elt_f1[3], elt_f2[3];
+                dReal          contact_elt_length = SMALL_ELT;
+                LineContactSet  firstClippedTri, secondClippedTri;
+                dVector3       *firstClippedElt = new dVector3[LineContactSet::MAX_POINTS];
+                dVector3       *secondClippedElt = new dVector3[LineContactSet::MAX_POINTS];
+
+
+                // only do these expensive inversions once
+                dMatrix4 InvMatrix1, InvMatrix2;
+                dInvertMatrix4(A, InvMatrix1);
+                dInvertMatrix4(B, InvMatrix2);
+
+
+                for (int i = 0; i < TriCount; i++) {
+
+                    id1 = CollidingPairs[i].id0;
+                    id2 = CollidingPairs[i].id1;
+
+                    // grab the colliding triangles
+                    static_cast<dxTriMesh *>(g1)->fetchMeshTriangle(v1, id1, TLPosition1, TLRotation1);
+                    static_cast<dxTriMesh *>(g2)->fetchMeshTriangle(v2, id2, TLPosition2, TLRotation2);
+
+                    // Since we'll be doing matrix transformations, we need to
+                    //  make sure that all vertices have four elements
+                    for (int j=0; j<3; j++) {
+                        v1[j][3] = 1.0;
+                        v2[j][3] = 1.0;
+                    }
+
+
+                    int IsCoplanar = 0;
+                    dReal IsectPt1[3], IsectPt2[3];
+
+                    // Sometimes OPCODE makes mistakes, so we look at the return
+                    //  value for TriTriIntersectWithIsectLine.  A retcode of "0"
+                    //  means no intersection took place
+                    if ( TriTriIntersectWithIsectLine( v1[0], v1[1], v1[2], v2[0], v2[1], v2[2],
+                        &IsCoplanar,
+                        IsectPt1, IsectPt2) ) {
+
+                            // Compute the normals of the colliding faces
+                            //
+                            if (TriNormals1 == NULL) {
+                                SUB( e1, v1[1], v1[0] );
+                                SUB( e2, v1[2], v1[0] );
+                                CROSS( n1, e1, e2 );
+                                dNormalize3(n1);
+                            }
+                            else {
+                                // If we were passed normals, we need to adjust them to take into
+                                //  account the objects' current rotations
+                                e1[0] = TriNormals1[id1*3];
+                                e1[1] = TriNormals1[id1*3 + 1];
+                                e1[2] = TriNormals1[id1*3 + 2];
+                                e1[3] = 0.0;
+
+                                //dMultiply1(n1, TLRotation1, e1, 3, 3, 1);
+                                dMultiply0(n1, TLRotation1, e1, 3, 3, 1);
+                                n1[3] = 1.0;
+                            }
+
+                            if (TriNormals2 == NULL)  {
+                                SUB( e1, v2[1], v2[0] );
+                                SUB( e2, v2[2], v2[0] );
+                                CROSS( n2, e1, e2);
+                                dNormalize3(n2);
+                            }
+                            else {
+                                // If we were passed normals, we need to adjust them to take into
+                                //  account the objects' current rotations
+                                e2[0] = TriNormals2[id2*3];
+                                e2[1] = TriNormals2[id2*3 + 1];
+                                e2[2] = TriNormals2[id2*3 + 2];
+                                e2[3] = 0.0;
+
+                                //dMultiply1(n2, TLRotation2, e2, 3, 3, 1);
+                                dMultiply0(n2, TLRotation2, e2, 3, 3, 1);
+                                n2[3] = 1.0;
+                            }
+
+
+                            if (IsCoplanar) {
+                                // We can reach this case if the faces are coplanar, OR
+                                //  if they don't actually intersect.  (OPCODE can make
+                                //  mistakes)
+                                if (dFabs(dCalcVectorDot3(n1, n2)) > REAL(0.999)) {
+                                    // If the faces are coplanar, we declare that the point of
+                                    //  contact is at the average location of the vertices of
+                                    //  both faces
+                                    dVector3 ContactPt;
+                                    for (int j=0; j<3; j++) {
+                                        ContactPt[j] = 0.0;
+                                        for (int k=0; k<3; k++)
+                                            ContactPt[j] += v1[k][j] + v2[k][j];
+                                        ContactPt[j] /= 6.0;
+                                    }
+                                    ContactPt[3] = 1.0;
+
+                                    // and the contact normal is the normal of face 2
+                                    //  (could be face 1, because they are the same)
+                                    SET(n, n2);
+
+                                    // and the penetration depth is the co-normal
+                                    // distance between any two vertices A and B,
+                                    // i.e.  d = DOT(n, (A-B))
+                                    DEPTH(depth, v1[1], v2[1], n);
+                                    if (depth < 0)
+                                        depth *= -1.0;
+
+                                    GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                        ContactPt, n, depth, OutTriCount);
+                                }
+                            }
+                            else {
+                                // Otherwise (in non-co-planar cases), we create a coplanar 
+                                //  point -- the middle of the line of intersection -- that
+                                //   will be used for various computations down the road
+                                for (int j=0; j<3; j++)
+                                    CoplanarPt[j] = ( (IsectPt1[j] + IsectPt2[j]) / REAL(2.0) );
+                                CoplanarPt[3] = 1.0;
+
+                                // Find the ELT of the coplanar point
+                                //
+                                dMultiply1(orig_pos, InvMatrix1, CoplanarPt, 4, 4, 1);
+                                dMultiply1(old_pos1, ((dxTriMesh*)g1)->m_last_trans, orig_pos, 4, 4, 1);
+                                SUB(elt1, CoplanarPt, old_pos1);
+
+                                dMultiply1(orig_pos, InvMatrix2, CoplanarPt, 4, 4, 1);
+                                dMultiply1(old_pos2, ((dxTriMesh*)g2)->m_last_trans, orig_pos, 4, 4, 1);
+                                SUB(elt2, CoplanarPt, old_pos2);
+
+                                SUB(elt_sum, elt1, elt2);  // net motion of the coplanar point
+                                dReal elt_sum_len = LENGTH(elt_sum); // Could be calculated on demand but there is no good place...
+
+
+                                // Calculate how much the vertices of each face moved in the
+                                //  direction of the opposite face's normal
+                                //
+                                dReal    total_dp1, total_dp2;
+                                total_dp1 = 0.0;
+                                total_dp2 = 0.0;
+
+                                for (int ii=0; ii<3; ii++) {
+                                    // find the estimated linear translation (ELT) of the vertices
+                                    //  on face 1, wrt to the center of face 2. 
+
+                                    // un-transform this vertex by the current transform
+                                    dMultiply1(orig_pos, InvMatrix1, v1[ii], 4, 4, 1 );
+
+                                    // re-transform this vertex by last_trans (to get its old
+                                    //  position)
+                                    dMultiply1(old_pos1, ((dxTriMesh*)g1)->m_last_trans, orig_pos, 4, 4, 1);
+
+                                    // Then subtract this position from our current one to find
+                                    //  the elapsed linear translation (ELT)
+                                    for (int k=0; k<3; k++) {
+                                        elt_f1[ii][k] = (v1[ii][k] - old_pos1[k]) - elt2[k];
+                                    }
+
+                                    // Take the dot product of the ELT  for each vertex (wrt the
+                                    //  center of face2)
+                                    total_dp1 += dFabs( dCalcVectorDot3(elt_f1[ii], n2) );
+                                }
+
+                                for (int ii=0; ii<3; ii++) {
+                                    // find the estimated linear translation (ELT) of the vertices
+                                    //  on face 2, wrt to the center of face 1. 
+                                    dMultiply1(orig_pos, InvMatrix2, v2[ii], 4, 4, 1);
+                                    dMultiply1(old_pos2, ((dxTriMesh*)g2)->m_last_trans, orig_pos, 4, 4, 1);
+                                    for (int k=0; k<3; k++) {
+                                        elt_f2[ii][k] = (v2[ii][k] - old_pos2[k]) - elt1[k];
+                                    }
+
+                                    // Take the dot product of the ELT  for each vertex (wrt the
+                                    //  center of face2) and add them
+                                    total_dp2 += dFabs( dCalcVectorDot3(elt_f2[ii], n1) );
+                                }
+
+
+                                ////////
+                                // Estimate the penetration depth.  
+                                //                            
+                                dReal    dp;
+                                BOOL      badPen = true;
+                                dVector3 *pen_v;   // the "penetrating vertices"
+                                dVector3 *pen_elt; // the elt_f of the penetrating face
+                                dVector3 *col_v;   // the "collision vertices" (the penetrated face)
+
+                                SMULT(n2, n2, -1.0); // SF PATCH #1335183
+                                depth = 0.0;
+                                if ((total_dp1 > DISTANCE_EPSILON) || (total_dp2 > DISTANCE_EPSILON)) {
+                                    ////////
+                                    // Find the collision normal, by finding the face
+                                    //  that is pointed "most" in the direction of travel
+                                    //  of the two triangles
+                                    //
+                                    if (total_dp2 > total_dp1) {
+                                        pen_v = v2;
+                                        pen_elt = elt_f2;
+                                        col_v = v1;
+                                        SET(n, n1);
+                                    }
+                                    else {
+                                        pen_v = v1;
+                                        pen_elt = elt_f1;
+                                        col_v = v2;
+                                        SET(n, n2);
+                                    }
+                                }
+                                else {
+                                    // the total_dp is very small, so let's fall back
+                                    //  to a different test
+                                    if (LENGTH(elt2) > LENGTH(elt1)) {
+                                        pen_v = v2;
+                                        pen_elt = elt_f2;
+                                        col_v = v1;
+                                        SET(n, n1);
+                                    }
+                                    else {
+                                        pen_v = v1;
+                                        pen_elt = elt_f1;
+                                        col_v = v2;
+                                        SET(n, n2);
+                                    }
+                                }
+
+
+                                for (int j=0; j<3; j++) {
+                                    if (SimpleUnclippedTest(CoplanarPt, pen_v[j], pen_elt[j], n, col_v, depth)) {
+                                        GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                            pen_v[j], n, depth, OutTriCount);
+                                        badPen = false;
+
+                                        if ((OutTriCount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                                            break;
+                                        }
+                                    }
+                                }
+
+                                if (badPen) {
+                                    // try the other normal
+                                    SwapNormals(pen_v, col_v, v1, v2, pen_elt, elt_f1, elt_f2, n, n1, n2);
+
+                                    for (int j=0; j<3; j++)
+                                        if (SimpleUnclippedTest(CoplanarPt, pen_v[j], pen_elt[j], n, col_v, depth)) {
+                                            GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                                pen_v[j], n, depth, OutTriCount);
+                                            badPen = false;
+
+                                            if ((OutTriCount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                                                break;
+                                            }
+                                        }
+                                }
+
+
+
+                                ////////////////////////////////////////
+                                //
+                                // If we haven't found a good penetration, then we're probably straddling
+                                //  the edge of one of the objects, or the penetraing face is big
+                                //  enough that all of its vertices are outside the bounds of the
+                                //  penetrated face.
+                                // In these cases, we do a more expensive test. We clip the penetrating
+                                //  triangle with a solid defined by the penetrated triangle, and repeat
+                                //  the tests above on this new polygon
+                                if (badPen) {
+
+                                    // Switch pen_v and n back again
+                                    SwapNormals(pen_v, col_v, v1, v2, pen_elt, elt_f1, elt_f2, n, n1, n2);
+
+
+                                    // Find the three sides (no top or bottom) of the solid defined by 
+                                    //  the edges of the penetrated triangle.
+
+                                    // The dVector4 "plane" structures contain the following information:
+                                    //  [0]-[2]: The normal of the face, pointing INWARDS (i.e.
+                                    //           the inverse normal
+                                    //  [3]: The distance between the face and the center of the
+                                    //       solid, along the normal
+                                    dVector4 SolidPlanes[3];
+                                    dVector3 tmp1;
+                                    dVector3 sn;
+
+                                    for (int j=0; j<3; j++) {
+                                        e1[j] = col_v[1][j] - col_v[0][j];
+                                        e2[j] = col_v[0][j] - col_v[2][j];
+                                        e3[j] = col_v[2][j] - col_v[1][j];
+                                    }
+
+                                    // side 1
+                                    CROSS(sn, e1, n);
+                                    dNormalize3(sn);
+                                    SMULT( SolidPlanes[0], sn, -1.0 );
+
+                                    ADD(tmp1, col_v[0], col_v[1]); 
+                                    SMULT(tmp1, tmp1, 0.5); // center of edge
+                                    // distance from center to edge along normal
+                                    SolidPlanes[0][3] = dCalcVectorDot3(tmp1, SolidPlanes[0]);
+
+
+                                    // side 2
+                                    CROSS(sn, e2, n);
+                                    dNormalize3(sn);
+                                    SMULT( SolidPlanes[1], sn, -1.0 );
+
+                                    ADD(tmp1, col_v[0], col_v[2]); 
+                                    SMULT(tmp1, tmp1, 0.5); // center of edge
+                                    // distance from center to edge along normal
+                                    SolidPlanes[1][3] = dCalcVectorDot3(tmp1, SolidPlanes[1]);
+
+
+                                    // side 3
+                                    CROSS(sn, e3, n);
+                                    dNormalize3(sn);
+                                    SMULT( SolidPlanes[2], sn, -1.0 );
+
+                                    ADD(tmp1, col_v[2], col_v[1]); 
+                                    SMULT(tmp1, tmp1, 0.5); // center of edge
+                                    // distance from center to edge along normal
+                                    SolidPlanes[2][3] = dCalcVectorDot3(tmp1, SolidPlanes[2]);
+
+
+                                    FindTriSolidIntrsection(pen_v, SolidPlanes, 3, firstClippedTri);
+
+                                    for (int j=0; j<firstClippedTri.Count; j++) {
+                                        firstClippedTri.Points[j][3] = 1.0; // because we will be doing matrix mults
+
+                                        DEPTH(dp, CoplanarPt, firstClippedTri.Points[j], n);
+
+                                        // if the penetration depth (calculated above) is more than the contact
+                                        //  point's ELT, then we've chosen the wrong face and should switch faces
+                                        if (pen_v == v1) {
+                                            dMultiply1(orig_pos, InvMatrix1, firstClippedTri.Points[j], 4, 4, 1);
+                                            dMultiply1(old_pos1, ((dxTriMesh*)g1)->m_last_trans, orig_pos, 4, 4, 1);
+                                            for (int k=0; k<3; k++) {
+                                                firstClippedElt[j][k] = (firstClippedTri.Points[j][k] - old_pos1[k]) - elt2[k];
+                                            }
+                                        }
+                                        else {
+                                            dMultiply1(orig_pos, InvMatrix2, firstClippedTri.Points[j], 4, 4, 1);
+                                            dMultiply1(old_pos2, ((dxTriMesh*)g2)->m_last_trans, orig_pos, 4, 4, 1);
+                                            for (int k=0; k<3; k++) {
+                                                firstClippedElt[j][k] = (firstClippedTri.Points[j][k] - old_pos2[k]) - elt1[k];
+                                            }
+                                        }
+
+                                        if (dp >= 0.0) {
+                                            contact_elt_length = dFabs(dCalcVectorDot3(firstClippedElt[j], n));
+
+                                            depth = dp;
+                                            if (depth == 0.0)
+                                                depth = dMin(DISTANCE_EPSILON, contact_elt_length);
+
+                                            if ((contact_elt_length < SMALL_ELT) && (depth < EXPANDED_ELT_THRESH))
+                                                depth = contact_elt_length;
+
+                                            if (depth <= contact_elt_length) {
+                                                // Add a contact
+                                                GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                                    firstClippedTri.Points[j], n, depth, OutTriCount);
+                                                badPen = false;
+
+                                                if ((OutTriCount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                                                    break;
+                                                }
+                                            }
+                                        }
+
+                                    }
+                                }
+
+                                if (badPen) {
+                                    // Switch pen_v and n (again!)
+                                    SwapNormals(pen_v, col_v, v1, v2, pen_elt, elt_f1, elt_f2, n, n1, n2);
+
+
+                                    // Find the three sides (no top or bottom) of the solid created by 
+                                    //  the penetrated triangle.
+                                    // The dVector4 "plane" structures contain the following information:
+                                    //  [0]-[2]: The normal of the face, pointing INWARDS (i.e.
+                                    //           the inverse normal
+                                    //  [3]: The distance between the face and the center of the
+                                    //       solid, along the normal
+                                    dVector4 SolidPlanes[3];
+                                    dVector3 tmp1;
+
+                                    dVector3 sn;
+                                    for (int j=0; j<3; j++) {
+                                        e1[j] = col_v[1][j] - col_v[0][j];
+                                        e2[j] = col_v[0][j] - col_v[2][j];
+                                        e3[j] = col_v[2][j] - col_v[1][j];
+                                    }
+
+                                    // side 1
+                                    CROSS(sn, e1, n);
+                                    dNormalize3(sn);
+                                    SMULT( SolidPlanes[0], sn, -1.0 );
+
+                                    ADD(tmp1, col_v[0], col_v[1]); 
+                                    SMULT(tmp1, tmp1, 0.5); // center of edge
+                                    // distance from center to edge along normal
+                                    SolidPlanes[0][3] = dCalcVectorDot3(tmp1, SolidPlanes[0]);
+
+
+                                    // side 2
+                                    CROSS(sn, e2, n);
+                                    dNormalize3(sn);
+                                    SMULT( SolidPlanes[1], sn, -1.0 );
+
+                                    ADD(tmp1, col_v[0], col_v[2]); 
+                                    SMULT(tmp1, tmp1, 0.5); // center of edge
+                                    // distance from center to edge along normal
+                                    SolidPlanes[1][3] = dCalcVectorDot3(tmp1, SolidPlanes[1]);
+
+
+                                    // side 3
+                                    CROSS(sn, e3, n);
+                                    dNormalize3(sn);
+                                    SMULT( SolidPlanes[2], sn, -1.0 );
+
+                                    ADD(tmp1, col_v[2], col_v[1]); 
+                                    SMULT(tmp1, tmp1, 0.5); // center of edge
+                                    // distance from center to edge along normal
+                                    SolidPlanes[2][3] = dCalcVectorDot3(tmp1, SolidPlanes[2]);
+
+                                    FindTriSolidIntrsection(pen_v, SolidPlanes, 3, secondClippedTri);
+
+                                    for (int j=0; j<secondClippedTri.Count; j++) {
+                                        secondClippedTri.Points[j][3] = 1.0; // because we will be doing matrix mults
+
+                                        DEPTH(dp, CoplanarPt, secondClippedTri.Points[j], n);
+
+                                        if (pen_v == v1) {
+                                            dMultiply1(orig_pos, InvMatrix1, secondClippedTri.Points[j], 4, 4, 1);
+                                            dMultiply1(old_pos1, ((dxTriMesh*)g1)->m_last_trans, orig_pos, 4, 4, 1);
+                                            for (int k=0; k<3; k++) {
+                                                secondClippedElt[j][k] = (secondClippedTri.Points[j][k] - old_pos1[k]) - elt2[k];
+                                            }
+                                        }
+                                        else {
+                                            dMultiply1(orig_pos, InvMatrix2, secondClippedTri.Points[j], 4, 4, 1);
+                                            dMultiply1(old_pos2, ((dxTriMesh*)g2)->m_last_trans, orig_pos, 4, 4, 1);
+                                            for (int k=0; k<3; k++) {
+                                                secondClippedElt[j][k] = (secondClippedTri.Points[j][k] - old_pos2[k]) - elt1[k];
+                                            }
+                                        }
+
+
+                                        if (dp >= 0.0) {
+                                            contact_elt_length = dFabs(dCalcVectorDot3(secondClippedElt[j],n));
+
+                                            depth = dp;
+                                            if (depth == 0.0)
+                                                depth = dMin(DISTANCE_EPSILON, contact_elt_length);
+
+                                            if ((contact_elt_length < SMALL_ELT) && (depth < EXPANDED_ELT_THRESH))
+                                                depth = contact_elt_length;
+
+                                            if (depth <= contact_elt_length) {
+                                                // Add a contact
+                                                GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                                    secondClippedTri.Points[j], n, depth, OutTriCount);
+                                                badPen = false;
+
+                                                if ((OutTriCount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                                                    break;
+                                                }
+                                            }
+                                        }
+
+
+                                    }
+                                }
+
+
+
+                                /////////////////
+                                // All conventional tests have failed at this point, so now we deal with
+                                //  cases on a more "heuristic" basis
+                                //
+
+                                if (badPen) {
+                                    // Switch pen_v and n (for the fourth time, so they're
+                                    //  what my original guess said they were)
+                                    SwapNormals(pen_v, col_v, v1, v2, pen_elt, elt_f1, elt_f2, n, n1, n2);
+
+                                    if (dFabs(dCalcVectorDot3(n1, n2)) < REAL(0.01)) {
+                                        // If we reach this point, we have (close to) perpindicular
+                                        //  faces, either resting on each other or sliding in a
+                                        // direction orthogonal to both surface normals.
+                                        if (elt_sum_len < DISTANCE_EPSILON) {
+                                            depth = dFabs(dCalcVectorDot3(n, elt_sum));
+
+                                            if (depth > REAL(1e-12)) {
+                                                dNormalize3(n);
+                                                GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                                    CoplanarPt, n, depth, OutTriCount);
+                                                badPen = false;
+                                            }
+                                            else {
+                                                // If the two faces are (nearly) perfectly at rest with
+                                                //  respect to each other, then we ignore the contact,
+                                                //  allowing the objects to slip a little in the hopes
+                                                //  that next frame, they'll give us something to work
+                                                //  with.
+                                                badPen = false;
+                                            }
+                                        }
+                                        else {
+                                            // The faces are perpindicular, but moving significantly
+                                            //  This can be sliding, or an unusual edge-straddling 
+                                            //  penetration.
+                                            dVector3 cn;
+
+                                            CROSS(cn, n1, n2);
+                                            dNormalize3(cn);
+                                            SET(n, cn);
+
+                                            // The shallowest ineterpenetration of the faces
+                                            //  is the depth
+                                            dVector3 ContactPt;
+                                            dVector3 dvTmp;
+                                            dReal    rTmp;
+                                            depth = dInfinity;
+                                            for (int j=0; j<3; j++) {
+                                                for (int k=0; k<3; k++) {
+                                                    SUB(dvTmp, col_v[k], pen_v[j]);
+
+                                                    rTmp = dCalcVectorDot3(dvTmp, n);
+                                                    if ( dFabs(rTmp) < dFabs(depth) ) {
+                                                        depth = rTmp;
+                                                        SET( ContactPt, pen_v[j] );
+                                                        contact_elt_length = dFabs(dCalcVectorDot3(pen_elt[j], n));
+                                                    }
+                                                }
+                                            }
+                                            if (depth < 0.0) {
+                                                SMULT(n, n, -1.0);
+                                                depth *= -1.0;
+                                            }
+
+                                            if ((depth > 0.0) && (depth <= contact_elt_length)) {
+                                                GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                                    ContactPt, n, depth, OutTriCount);
+                                                badPen = false;
+                                            }
+
+                                        }
+                                    }
+                                }
+
+
+                                if (badPen && elt_sum_len != 0.0) {
+                                    // Use as the normal the direction of travel, rather than any particular
+                                    //  face normal
+                                    //
+                                    dVector3 esn;
+
+                                    if (pen_v == v1) {
+                                        SMULT(esn, elt_sum, -1.0);
+                                    }
+                                    else {
+                                        SET(esn, elt_sum);
+                                    }
+                                    dNormalize3(esn);
+
+
+                                    // The shallowest ineterpenetration of the faces
+                                    //  is the depth
+                                    dVector3 ContactPt;
+                                    depth = dInfinity;
+                                    for (int j=0; j<3; j++) {
+                                        for (int k=0; k<3; k++) {
+                                            DEPTH(dp, col_v[k], pen_v[j], esn);
+                                            if ( (ExamineContactPoint(col_v, esn, pen_v[j])) &&
+                                                ( dFabs(dp) < dFabs(depth)) ) {
+                                                    depth = dp;
+                                                    SET( ContactPt, pen_v[j] );
+                                                    contact_elt_length = dFabs(dCalcVectorDot3(pen_elt[j], esn));
+                                            }
+                                        }
+                                    }
+
+                                    if ((depth > 0.0) && (depth <= contact_elt_length)) {
+                                        GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                            ContactPt, esn, depth, OutTriCount);
+                                        badPen = false;
+                                    }
+                                }
+
+
+                                if (badPen && elt_sum_len != 0.0) {
+                                    // If the direction of motion is perpindicular to both normals
+                                    if ( (dFabs(dCalcVectorDot3(n1, elt_sum)) < REAL(0.01)) && (dFabs(dCalcVectorDot3(n2, elt_sum)) < REAL(0.01)) ) {
+                                        dVector3 esn;
+                                        if (pen_v == v1) {
+                                            SMULT(esn, elt_sum, -1.0);
+                                        }
+                                        else {
+                                            SET(esn, elt_sum);
+                                        }
+
+                                        dNormalize3(esn);
+
+
+                                        // Look at the clipped points again, checking them against this
+                                        //  new normal
+                                        for (int j=0; j<firstClippedTri.Count; j++) {
+                                            DEPTH(dp, CoplanarPt, firstClippedTri.Points[j], esn);
+
+                                            if (dp >= 0.0) {
+                                                contact_elt_length = dFabs(dCalcVectorDot3(firstClippedElt[j], esn));
+
+                                                depth = dp;
+                                                //if (depth == 0.0)
+                                                //depth = dMin(DISTANCE_EPSILON, contact_elt_length);
+
+                                                if ((contact_elt_length < SMALL_ELT) && (depth < EXPANDED_ELT_THRESH))
+                                                    depth = contact_elt_length;
+
+                                                if (depth <= contact_elt_length) {
+                                                    // Add a contact
+                                                    GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                                        firstClippedTri.Points[j], esn, depth, OutTriCount);
+                                                    badPen = false;
+
+                                                    if ((OutTriCount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                                                        break;
+                                                    }
+                                                }
+                                            }
+                                        }
+
+                                        if (badPen) {
+                                            // If this test failed, try it with the second set of clipped faces
+                                            for (int j=0; j<secondClippedTri.Count; j++) {
+                                                DEPTH(dp, CoplanarPt, secondClippedTri.Points[j], esn);
+
+                                                if (dp >= 0.0) {
+                                                    contact_elt_length = dFabs(dCalcVectorDot3(secondClippedElt[j], esn));
+
+                                                    depth = dp;
+                                                    //if (depth == 0.0)
+                                                    //depth = dMin(DISTANCE_EPSILON, contact_elt_length);
+
+                                                    if ((contact_elt_length < SMALL_ELT) && (depth < EXPANDED_ELT_THRESH))
+                                                        depth = contact_elt_length;
+
+                                                    if (depth <= contact_elt_length) {
+                                                        // Add a contact
+                                                        GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                                            secondClippedTri.Points[j], esn, depth, OutTriCount);
+                                                        badPen = false;
+
+                                                        if ((OutTriCount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                                                            break;
+                                                        }
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+
+
+
+                                if (badPen) {
+                                    // if we have very little motion, we're dealing with resting contact
+                                    //  and shouldn't reference the ELTs at all
+                                    //
+                                    if (elt_sum_len < VELOCITY_EPSILON) {
+
+                                        // instead of a "contact_elt_length" threshhold, we'll use an
+                                        //  arbitrary, small one
+                                        for (int j=0; j<3; j++) {
+                                            DEPTH(dp, CoplanarPt, pen_v[j], n);
+
+                                            if (dp == 0.0)
+                                                dp = TINY_PENETRATION;
+
+                                            if ( (dp > 0.0) && (dp <= SMALL_ELT)) {
+                                                // Add a contact
+                                                GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                                    pen_v[j], n, dp, OutTriCount);
+                                                badPen = false;
+
+                                                if ((OutTriCount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                                                    break;
+                                                }
+                                            }
+                                        }
+
+
+                                        if (badPen) {
+                                            // try the other normal
+                                            SwapNormals(pen_v, col_v, v1, v2, pen_elt, elt_f1, elt_f2, n, n1, n2);
+
+                                            for (int j=0; j<3; j++) {
+                                                DEPTH(dp, CoplanarPt, pen_v[j], n);
+
+                                                if (dp == 0.0)
+                                                    dp = TINY_PENETRATION;
+
+                                                if ( (dp > 0.0) && (dp <= SMALL_ELT)) {
+                                                    GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                                        pen_v[j], n, dp, OutTriCount);
+                                                    badPen = false;
+
+                                                    if ((OutTriCount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                                                        break;
+                                                    }
+                                                }
+                                            }
+                                        }
+
+
+
+                                    }
+                                }
+
+                                if (badPen) {
+                                    // find the nearest existing contact, and replicate it's
+                                    //  normal and depth
+                                    //
+                                    dContactGeom*  Contact;
+                                    dVector3       pos_diff;
+                                    dReal          min_dist, dist;
+
+                                    min_dist = dInfinity;
+                                    depth = 0.0;
+                                    for (int j=0; j<OutTriCount; j++) {
+                                        Contact = SAFECONTACT(Flags, Contacts, j, Stride);
+
+                                        SUB(pos_diff,  Contact->pos, CoplanarPt);
+
+                                        dist = dCalcVectorDot3(pos_diff, pos_diff);
+                                        if (dist < min_dist) {
+                                            min_dist = dist;
+                                            depth = Contact->depth;
+                                            SMULT(ContactNormal, Contact->normal, -1.0);
+                                        }
+                                    }
+
+                                    if (depth > 0.0) {
+                                        // Add a tiny contact at the coplanar point
+                                        GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                            CoplanarPt, ContactNormal, depth, OutTriCount);
+                                        badPen = false;
+                                    }
+                                }
+
+
+                                if (badPen) {
+                                    // Add a tiny contact at the coplanar point                                    
+                                    if (-dCalcVectorDot3(elt_sum, n1) > -dCalcVectorDot3(elt_sum, n2)) {
+                                        SET(ContactNormal, n1);
+                                    }
+                                    else {
+                                        SET(ContactNormal, n2);
+                                    }
+
+                                    GenerateContact(Flags, Contacts, Stride,  TriMesh1,  TriMesh2, id1, id2,
+                                        CoplanarPt, ContactNormal, TINY_PENETRATION, OutTriCount);
+                                    badPen = false;
+                                }
+
+
+                            } // not coplanar (main loop)
+                    } // TriTriIntersectWithIsectLine
+
+                    if ((OutTriCount | CONTACTS_UNIMPORTANT) == (Flags & (NUMC_MASK | CONTACTS_UNIMPORTANT))) {
+                        break;
+                    }
+                }
+
+                // Free memory
+                delete[] firstClippedElt;
+                delete[] secondClippedElt;	
+
+                // Return the number of contacts
+                return OutTriCount; 
+            }
+        }
+    }
+
+
+    // There was some kind of failure during the Collide call or
+    // there are no faces overlapping
+    return 0;    
+}
+
+
+/* -- not used
+static void
+GetTriangleGeometryCallback(udword triangleindex, VertexPointers& triangle, udword user_data)
+{
+dVector3 Out[3];
+
+FetchTriangle((dxTriMesh*) user_data, (int) triangleindex, Out);
+
+for (int i = 0; i < 3; i++)
+triangle.Vertex[i] =  (const Point*) ((dReal*) Out[i]);
+}
+*/
+
+//
+//
+//
+#define B11   B[0]
+#define B12   B[1]
+#define B13   B[2]
+#define B14   B[3]
+#define B21   B[4]
+#define B22   B[5]
+#define B23   B[6]
+#define B24   B[7]
+#define B31   B[8]
+#define B32   B[9]
+#define B33   B[10]
+#define B34   B[11]
+#define B41   B[12]
+#define B42   B[13]
+#define B43   B[14]
+#define B44   B[15]
+
+#define Binv11   Binv[0]
+#define Binv12   Binv[1]
+#define Binv13   Binv[2]
+#define Binv14   Binv[3]
+#define Binv21   Binv[4]
+#define Binv22   Binv[5]
+#define Binv23   Binv[6]
+#define Binv24   Binv[7]
+#define Binv31   Binv[8]
+#define Binv32   Binv[9]
+#define Binv33   Binv[10]
+#define Binv34   Binv[11]
+#define Binv41   Binv[12]
+#define Binv42   Binv[13]
+#define Binv43   Binv[14]
+#define Binv44   Binv[15]
+
+inline void
+dMakeMatrix4(const dVector3 Position, const dMatrix3 Rotation, dMatrix4 &B)
+{
+    B11 = Rotation[0]; B21 = Rotation[1]; B31 = Rotation[2];    B41 = Position[0]; 
+    B12 = Rotation[4]; B22 = Rotation[5]; B32 = Rotation[6];    B42 = Position[1];
+    B13 = Rotation[8]; B23 = Rotation[9]; B33 = Rotation[10];   B43 = Position[2];
+
+    B14 = 0.0;         B24 = 0.0;         B34 = 0.0;            B44 = 1.0;
+}
+
+
+static void
+dInvertMatrix4( dMatrix4& B, dMatrix4& Binv )
+{
+    dReal det =  (B11 * B22 - B12 * B21) * (B33 * B44 - B34 * B43)
+        -(B11 * B23 - B13 * B21) * (B32 * B44 - B34 * B42)
+        +(B11 * B24 - B14 * B21) * (B32 * B43 - B33 * B42)
+        +(B12 * B23 - B13 * B22) * (B31 * B44 - B34 * B41)
+        -(B12 * B24 - B14 * B22) * (B31 * B43 - B33 * B41)
+        +(B13 * B24 - B14 * B23) * (B31 * B42 - B32 * B41);
+
+    dAASSERT (det != 0.0);    
+
+    det = 1.0 / det;
+
+    Binv11 = (dReal) (det * ((B22 * B33) - (B23 * B32)));
+    Binv12 = (dReal) (det * ((B32 * B13) - (B33 * B12)));
+    Binv13 = (dReal) (det * ((B12 * B23) - (B13 * B22)));
+    Binv14 = 0.0f;
+    Binv21 = (dReal) (det * ((B23 * B31) - (B21 * B33)));
+    Binv22 = (dReal) (det * ((B33 * B11) - (B31 * B13)));
+    Binv23 = (dReal) (det * ((B13 * B21) - (B11 * B23)));
+    Binv24 = 0.0f;
+    Binv31 = (dReal) (det * ((B21 * B32) - (B22 * B31)));
+    Binv32 = (dReal) (det * ((B31 * B12) - (B32 * B11)));
+    Binv33 = (dReal) (det * ((B11 * B22) - (B12 * B21)));
+    Binv34 = 0.0f;
+    Binv41 = (dReal) (det * (B21*(B33*B42 - B32*B43) + B22*(B31*B43 - B33*B41) + B23*(B32*B41 - B31*B42)));
+    Binv42 = (dReal) (det * (B31*(B13*B42 - B12*B43) + B32*(B11*B43 - B13*B41) + B33*(B12*B41 - B11*B42)));
+    Binv43 = (dReal) (det * (B41*(B13*B22 - B12*B23) + B42*(B11*B23 - B13*B21) + B43*(B12*B21 - B11*B22)));
+    Binv44 = 1.0f;
+}
+
+
+
+/////////////////////////////////////////////////
+//
+// Triangle/Triangle intersection utilities
+//
+// From the article "A Fast Triangle-Triangle Intersection Test",
+// Journal of Graphics Tools, 2(2), 1997
+//
+// Some of this functionality is duplicated in OPCODE (see
+//  OPC_TriTriOverlap.h) but we have replicated it here so we don't
+//  have to mess with the internals of OPCODE, as well as so we can
+//  further optimize some of the functions.
+// 
+//  This version computes the line of intersection as well (if they
+//  are not coplanar):
+//  int TriTriIntersectWithIsectLine(dReal V0[3],dReal V1[3],dReal V2[3], 
+//                                   dReal U0[3],dReal U1[3],dReal U2[3],
+//                                   int *coplanar,
+//                                   dReal isectpt1[3],dReal isectpt2[3]);
+//
+//  parameters: vertices of triangle 1: V0,V1,V2
+//              vertices of triangle 2: U0,U1,U2
+//
+//  result    : returns 1 if the triangles intersect, otherwise 0
+//              "coplanar" returns whether the tris are coplanar
+//              isectpt1, isectpt2 are the endpoints of the line of
+//              intersection
+// 
+
+
+
+/* if USE_EPSILON_TEST is true then we do a check: 
+         if |dv|<EPSILON then dv=0.0;
+   else no check is done (which is less robust)
+*/
+#define USE_EPSILON_TEST TRUE  
+#define EPSILON REAL(0.000001)
+
+
+/* sort so that a<=b */
+#define SORT(a,b)       \
+    if(a>b)    \
+    {          \
+        dReal c; \
+        c=a;     \
+        a=b;     \
+        b=c;     \
+    }
+
+#define ISECT(VV0,VV1,VV2,D0,D1,D2,isect0,isect1) \
+    isect0=VV0+(VV1-VV0)*D0/(D0-D1);    \
+    isect1=VV0+(VV2-VV0)*D0/(D0-D2);
+
+
+#define COMPUTE_INTERVALS(VV0,VV1,VV2,D0,D1,D2,D0D1,D0D2,isect0,isect1) \
+    if(D0D1>0.0f)                                         \
+    {                                                     \
+        /* here we know that D0D2<=0.0 */                   \
+        /* that is D0, D1 are on the same side, D2 on the other or on the plane */ \
+        ISECT(VV2,VV0,VV1,D2,D0,D1,isect0,isect1);          \
+    }                                                     \
+    else if(D0D2>0.0f)                                    \
+    {                                                     \
+        /* here we know that d0d1<=0.0 */                   \
+        ISECT(VV1,VV0,VV2,D1,D0,D2,isect0,isect1);          \
+    }                                                     \
+    else if(D1*D2>0.0f || D0!=0.0f)                       \
+    {                                                     \
+        /* here we know that d0d1<=0.0 or that D0!=0.0 */   \
+        ISECT(VV0,VV1,VV2,D0,D1,D2,isect0,isect1);          \
+    }                                                     \
+    else if(D1!=0.0f)                                     \
+    {                                                     \
+        ISECT(VV1,VV0,VV2,D1,D0,D2,isect0,isect1);          \
+    }                                                     \
+    else if(D2!=0.0f)                                     \
+    {                                                     \
+        ISECT(VV2,VV0,VV1,D2,D0,D1,isect0,isect1);          \
+    }                                                     \
+    else                                                  \
+    {                                                     \
+        /* triangles are coplanar */                        \
+        return coplanar_tri_tri(N1,V0,V1,V2,U0,U1,U2);      \
+    }
+
+
+
+/* this edge to edge test is based on Franlin Antonio's gem:
+"Faster Line Segment Intersection", in Graphics Gems III,
+pp. 199-202 */ 
+#define EDGE_EDGE_TEST(V0,U0,U1)                      \
+    Bx=U0[i0]-U1[i0];                                   \
+    By=U0[i1]-U1[i1];                                   \
+    Cx=V0[i0]-U0[i0];                                   \
+    Cy=V0[i1]-U0[i1];                                   \
+    f=Ay*Bx-Ax*By;                                      \
+    d=By*Cx-Bx*Cy;                                      \
+    if((f>0 && d>=0 && d<=f) || (f<0 && d<=0 && d>=f))  \
+    {                                                   \
+        e=Ax*Cy-Ay*Cx;                                    \
+        if(f>0)                                           \
+    {                                                 \
+        if(e>=0 && e<=f) return 1;                      \
+    }                                                 \
+    else                                              \
+    {                                                 \
+        if(e<=0 && e>=f) return 1;                      \
+    }                                                 \
+}                                
+
+#define EDGE_AGAINST_TRI_EDGES(V0,V1,U0,U1,U2) \
+{                                              \
+    dReal Ax,Ay,Bx,By,Cx,Cy,e,d,f;               \
+    Ax=V1[i0]-V0[i0];                            \
+    Ay=V1[i1]-V0[i1];                            \
+    /* test edge U0,U1 against V0,V1 */          \
+    EDGE_EDGE_TEST(V0,U0,U1);                    \
+    /* test edge U1,U2 against V0,V1 */          \
+    EDGE_EDGE_TEST(V0,U1,U2);                    \
+    /* test edge U2,U1 against V0,V1 */          \
+    EDGE_EDGE_TEST(V0,U2,U0);                    \
+}
+
+#define POINT_IN_TRI(V0,U0,U1,U2)           \
+{                                           \
+    dReal a,b,c,d0,d1,d2;                     \
+    /* is T1 completly inside T2? */          \
+    /* check if V0 is inside tri(U0,U1,U2) */ \
+    a=U1[i1]-U0[i1];                          \
+    b=-(U1[i0]-U0[i0]);                       \
+    c=-a*U0[i0]-b*U0[i1];                     \
+    d0=a*V0[i0]+b*V0[i1]+c;                   \
+    \
+    a=U2[i1]-U1[i1];                          \
+    b=-(U2[i0]-U1[i0]);                       \
+    c=-a*U1[i0]-b*U1[i1];                     \
+    d1=a*V0[i0]+b*V0[i1]+c;                   \
+    \
+    a=U0[i1]-U2[i1];                          \
+    b=-(U0[i0]-U2[i0]);                       \
+    c=-a*U2[i0]-b*U2[i1];                     \
+    d2=a*V0[i0]+b*V0[i1]+c;                   \
+    if(d0*d1>0.0)                             \
+    {                                         \
+        if(d0*d2>0.0) return 1;                 \
+    }                                         \
+}
+
+int coplanar_tri_tri(dReal N[3],dReal V0[3],dReal V1[3],dReal V2[3],
+                     dReal U0[3],dReal U1[3],dReal U2[3])
+{
+    dReal A[3];
+    short i0,i1;
+    /* first project onto an axis-aligned plane, that maximizes the area */
+    /* of the triangles, compute indices: i0,i1. */
+    A[0]= dFabs(N[0]);
+    A[1]= dFabs(N[1]);
+    A[2]= dFabs(N[2]);
+    if(A[0]>A[1])
+    {
+        if(A[0]>A[2])  
+        {
+            i0=1;      /* A[0] is greatest */
+            i1=2;
+        }
+        else
+        {
+            i0=0;      /* A[2] is greatest */
+            i1=1;
+        }
+    }
+    else   /* A[0]<=A[1] */
+    {
+        if(A[2]>A[1])
+        {
+            i0=0;      /* A[2] is greatest */
+            i1=1;                                           
+        }
+        else
+        {
+            i0=0;      /* A[1] is greatest */
+            i1=2;
+        }
+    }               
+
+    /* test all edges of triangle 1 against the edges of triangle 2 */
+    EDGE_AGAINST_TRI_EDGES(V0,V1,U0,U1,U2);
+    EDGE_AGAINST_TRI_EDGES(V1,V2,U0,U1,U2);
+    EDGE_AGAINST_TRI_EDGES(V2,V0,U0,U1,U2);
+
+    /* finally, test if tri1 is totally contained in tri2 or vice versa */
+    POINT_IN_TRI(V0,U0,U1,U2);
+    POINT_IN_TRI(U0,V0,V1,V2);
+
+    return 0;
+}
+
+
+
+#define NEWCOMPUTE_INTERVALS(VV0,VV1,VV2,D0,D1,D2,D0D1,D0D2,A,B,C,X0,X1) \
+{ \
+    if(D0D1>0.0f) \
+    { \
+        /* here we know that D0D2<=0.0 */ \
+        /* that is D0, D1 are on the same side, D2 on the other or on the plane */ \
+        A=VV2; B=(VV0-VV2)*D2; C=(VV1-VV2)*D2; X0=D2-D0; X1=D2-D1; \
+    } \
+    else if(D0D2>0.0f)\
+    { \
+        /* here we know that d0d1<=0.0 */ \
+        A=VV1; B=(VV0-VV1)*D1; C=(VV2-VV1)*D1; X0=D1-D0; X1=D1-D2; \
+    } \
+    else if(D1*D2>0.0f || D0!=0.0f) \
+    { \
+        /* here we know that d0d1<=0.0 or that D0!=0.0 */ \
+        A=VV0; B=(VV1-VV0)*D0; C=(VV2-VV0)*D0; X0=D0-D1; X1=D0-D2; \
+    } \
+    else if(D1!=0.0f) \
+    { \
+        A=VV1; B=(VV0-VV1)*D1; C=(VV2-VV1)*D1; X0=D1-D0; X1=D1-D2; \
+    } \
+    else if(D2!=0.0f) \
+    { \
+        A=VV2; B=(VV0-VV2)*D2; C=(VV1-VV2)*D2; X0=D2-D0; X1=D2-D1; \
+    } \
+    else \
+    { \
+        /* triangles are coplanar */ \
+        return coplanar_tri_tri(N1,V0,V1,V2,U0,U1,U2); \
+    } \
+}
+
+
+
+
+/* sort so that a<=b */
+#define SORT2(a,b,smallest)       \
+    if(a>b)       \
+    {             \
+        dReal c;    \
+        c=a;        \
+        a=b;        \
+        b=c;        \
+        smallest=1; \
+    }             \
+    else smallest=0;
+
+
+inline void isect2(dReal VTX0[3],dReal VTX1[3],dReal VTX2[3],dReal VV0,dReal VV1,dReal VV2,
+                   dReal D0,dReal D1,dReal D2,dReal *isect0,dReal *isect1,dReal isectpoint0[3],dReal isectpoint1[3]) 
+{
+    dReal tmp=D0/(D0-D1);          
+    dReal diff[3];
+    *isect0=VV0+(VV1-VV0)*tmp;         
+    SUB(diff,VTX1,VTX0);              
+    MULT(diff,diff,tmp);               
+    ADD(isectpoint0,diff,VTX0);        
+    tmp=D0/(D0-D2);                    
+    *isect1=VV0+(VV2-VV0)*tmp;          
+    SUB(diff,VTX2,VTX0);                   
+    MULT(diff,diff,tmp);                 
+    ADD(isectpoint1,VTX0,diff);          
+}
+
+
+#if 0
+#define ISECT2(VTX0,VTX1,VTX2,VV0,VV1,VV2,D0,D1,D2,isect0,isect1,isectpoint0,isectpoint1) \
+    tmp=D0/(D0-D1);                \
+    isect0=VV0+(VV1-VV0)*tmp;      \
+    SUB(diff,VTX1,VTX0);               \
+    MULT(diff,diff,tmp);               \
+    ADD(isectpoint0,diff,VTX0);    \
+    tmp=D0/(D0-D2);
+    /*isect1=VV0+(VV2-VV0)*tmp;          \ */
+    /*SUB(diff,VTX2,VTX0);               \ */
+    /*MULT(diff,diff,tmp);               \ */
+    /*ADD(isectpoint1,VTX0,diff);          */
+#endif
+
+inline int compute_intervals_isectline(dReal VERT0[3],dReal VERT1[3],dReal VERT2[3],
+                                       dReal VV0,dReal VV1,dReal VV2,dReal D0,dReal D1,dReal D2,
+                                       dReal D0D1,dReal D0D2,dReal *isect0,dReal *isect1,
+                                       dReal isectpoint0[3],dReal isectpoint1[3])
+{
+    if(D0D1>0.0f)                                        
+    {                                                    
+        /* here we know that D0D2<=0.0 */                  
+        /* that is D0, D1 are on the same side, D2 on the other or on the plane */
+        isect2(VERT2,VERT0,VERT1,VV2,VV0,VV1,D2,D0,D1,isect0,isect1,isectpoint0,isectpoint1);
+    } 
+    else if(D0D2>0.0f)                                   
+    {                                                   
+        /* here we know that d0d1<=0.0 */             
+        isect2(VERT1,VERT0,VERT2,VV1,VV0,VV2,D1,D0,D2,isect0,isect1,isectpoint0,isectpoint1);
+    }                                                  
+    else if(D1*D2>0.0f || D0!=0.0f)   
+    {                                   
+        /* here we know that d0d1<=0.0 or that D0!=0.0 */
+        isect2(VERT0,VERT1,VERT2,VV0,VV1,VV2,D0,D1,D2,isect0,isect1,isectpoint0,isectpoint1);   
+    }                                                  
+    else if(D1!=0.0f)                                  
+    {                                               
+        isect2(VERT1,VERT0,VERT2,VV1,VV0,VV2,D1,D0,D2,isect0,isect1,isectpoint0,isectpoint1); 
+    }                                         
+    else if(D2!=0.0f)                                  
+    {                                                   
+        isect2(VERT2,VERT0,VERT1,VV2,VV0,VV1,D2,D0,D1,isect0,isect1,isectpoint0,isectpoint1);     
+    }                                                 
+    else                                               
+    {                                                   
+        /* triangles are coplanar */    
+        return 1;
+    }
+    return 0;
+}
+
+#define COMPUTE_INTERVALS_ISECTLINE(VERT0,VERT1,VERT2,VV0,VV1,VV2,D0,D1,D2,D0D1,D0D2,isect0,isect1,isectpoint0,isectpoint1) \
+    if(D0D1>0.0f)                                         \
+    {                                                     \
+        /* here we know that D0D2<=0.0 */                   \
+        /* that is D0, D1 are on the same side, D2 on the other or on the plane */ \
+        isect2(VERT2,VERT0,VERT1,VV2,VV0,VV1,D2,D0,D1,&isect0,&isect1,isectpoint0,isectpoint1);          \
+    }                                                     
+#if 0
+    else if(D0D2>0.0f)                                    \
+    {                                                     \
+    /* here we know that d0d1<=0.0 */                   \
+    isect2(VERT1,VERT0,VERT2,VV1,VV0,VV2,D1,D0,D2,&isect0,&isect1,isectpoint0,isectpoint1);          \
+    }                                                     \
+    else if(D1*D2>0.0f || D0!=0.0f)                       \
+    {                                                     \
+    /* here we know that d0d1<=0.0 or that D0!=0.0 */   \
+    isect2(VERT0,VERT1,VERT2,VV0,VV1,VV2,D0,D1,D2,&isect0,&isect1,isectpoint0,isectpoint1);          \
+    }                                                     \
+    else if(D1!=0.0f)                                     \
+    {                                                     \
+    isect2(VERT1,VERT0,VERT2,VV1,VV0,VV2,D1,D0,D2,&isect0,&isect1,isectpoint0,isectpoint1);          \
+    }                                                     \
+    else if(D2!=0.0f)                                     \
+    {                                                     \
+    isect2(VERT2,VERT0,VERT1,VV2,VV0,VV1,D2,D0,D1,&isect0,&isect1,isectpoint0,isectpoint1);          \
+    }                                                     \
+    else                                                  \
+    {                                                     \
+    /* triangles are coplanar */                        \
+    coplanar=1;                                         \
+    return coplanar_tri_tri(N1,V0,V1,V2,U0,U1,U2);      \
+    }
+#endif
+
+
+
+static int TriTriIntersectWithIsectLine(dReal V0[3],dReal V1[3],dReal V2[3],
+    dReal U0[3],dReal U1[3],dReal U2[3],int *coplanar,
+    dReal isectpt1[3],dReal isectpt2[3])
+{
+    dReal E1[3],E2[3];
+    dReal N1[3],N2[3],d1,d2;
+    dReal du0,du1,du2,dv0,dv1,dv2;
+    dReal D[3];
+    dReal isect1[2]={0,0}, isect2[2]={0,0};
+    dReal isectpointA1[3],isectpointA2[3];
+    dReal isectpointB1[3]={0,0,0},isectpointB2[3]={0,0,0};
+    dReal du0du1,du0du2,dv0dv1,dv0dv2;
+    short index;
+    dReal vp0,vp1,vp2;
+    dReal up0,up1,up2;
+    dReal b,c,max;
+    int smallest1,smallest2;
+
+    /* compute plane equation of triangle(V0,V1,V2) */
+    SUB(E1,V1,V0);
+    SUB(E2,V2,V0);
+    CROSS(N1,E1,E2);
+
+    // Even though all triangles might be initially valid, 
+    // a triangle may degenerate into a segment after applying 
+    // space transformation.
+    //
+    // Oleh_Derevenko: 
+    // I'm not quite sure if this routine will fail/assert for zero normal
+    // (it's too large and complex to be fully analyzed).
+    // However in such a large code block three extra float comparisons
+    // will not have any noticeable influence on performance.
+    if (IS_ZERO(N1))
+        return 0;
+
+    d1=-DOT(N1,V0);
+    /* plane equation 1: N1.X+d1=0 */
+
+    /* put U0,U1,U2 into plane equation 1 to compute signed distances to the plane*/
+    du0=DOT(N1,U0)+d1;
+    du1=DOT(N1,U1)+d1;
+    du2=DOT(N1,U2)+d1;
+
+    /* coplanarity robustness check */
+#if USE_EPSILON_TEST==TRUE
+    if(dFabs(du0)<EPSILON) du0=0.0;
+    if(dFabs(du1)<EPSILON) du1=0.0;
+    if(dFabs(du2)<EPSILON) du2=0.0;
+#endif
+    du0du1=du0*du1;
+    du0du2=du0*du2;
+
+    if(du0du1>0.0f && du0du2>0.0f) /* same sign on all of them + not equal 0 ? */
+        return 0;                    /* no intersection occurs */
+
+    /* compute plane of triangle (U0,U1,U2) */
+    SUB(E1,U1,U0);
+    SUB(E2,U2,U0);
+    CROSS(N2,E1,E2);
+
+    // Even though all triangles might be initially valid, 
+    // a triangle may degenerate into a segment after applying 
+    // space transformation.
+    //
+    // Oleh_Derevenko: 
+    // I'm not quite sure if this routine will fail/assert for zero normal
+    // (it's too large and complex to be fully analyzed).
+    // However in such a large code block three extra float comparisons
+    // will not have any noticeable influence on performance.
+    if (IS_ZERO(N2))
+        return 0;
+
+    d2=-DOT(N2,U0);
+    /* plane equation 2: N2.X+d2=0 */
+
+    /* put V0,V1,V2 into plane equation 2 */
+    dv0=DOT(N2,V0)+d2;
+    dv1=DOT(N2,V1)+d2;
+    dv2=DOT(N2,V2)+d2;
+
+#if USE_EPSILON_TEST==TRUE
+    if(dFabs(dv0)<EPSILON) dv0=0.0;
+    if(dFabs(dv1)<EPSILON) dv1=0.0;
+    if(dFabs(dv2)<EPSILON) dv2=0.0;
+#endif
+
+    dv0dv1=dv0*dv1;
+    dv0dv2=dv0*dv2;
+
+    if(dv0dv1>0.0f && dv0dv2>0.0f) /* same sign on all of them + not equal 0 ? */
+        return 0;                    /* no intersection occurs */
+
+    /* compute direction of intersection line */
+    CROSS(D,N1,N2);
+
+    /* compute and index to the largest component of D */
+    max= dFabs(D[0]);
+    index=0;
+    b= dFabs(D[1]);
+    c= dFabs(D[2]);
+    if(b>max) max=b,index=1;
+    if(c>max) max=c,index=2;
+
+    /* this is the simplified projection onto L*/
+    vp0=V0[index];
+    vp1=V1[index];
+    vp2=V2[index];
+
+    up0=U0[index];
+    up1=U1[index];
+    up2=U2[index];
+
+    /* compute interval for triangle 1 */
+    *coplanar=compute_intervals_isectline(V0,V1,V2,vp0,vp1,vp2,dv0,dv1,dv2,
+        dv0dv1,dv0dv2,&isect1[0],&isect1[1],isectpointA1,isectpointA2);
+    if(*coplanar) return coplanar_tri_tri(N1,V0,V1,V2,U0,U1,U2);     
+
+
+    /* compute interval for triangle 2 */
+    compute_intervals_isectline(U0,U1,U2,up0,up1,up2,du0,du1,du2,
+        du0du1,du0du2,&isect2[0],&isect2[1],isectpointB1,isectpointB2);
+
+    SORT2(isect1[0],isect1[1],smallest1);
+    SORT2(isect2[0],isect2[1],smallest2);
+
+    if(isect1[1]<isect2[0] || isect2[1]<isect1[0]) return 0;
+
+    /* at this point, we know that the triangles intersect */
+
+    if(isect2[0]<isect1[0])
+    {
+        if(smallest1==0) { SET(isectpt1,isectpointA1); }
+        else { SET(isectpt1,isectpointA2); }
+
+        if(isect2[1]<isect1[1])
+        {
+            if(smallest2==0) { SET(isectpt2,isectpointB2); }
+            else { SET(isectpt2,isectpointB1); }
+        }
+        else
+        {
+            if(smallest1==0) { SET(isectpt2,isectpointA2); }
+            else { SET(isectpt2,isectpointA1); }
+        }
+    }
+    else
+    {
+        if(smallest2==0) { SET(isectpt1,isectpointB1); }
+        else { SET(isectpt1,isectpointB2); }
+
+        if(isect2[1]>isect1[1])
+        {
+            if(smallest1==0) { SET(isectpt2,isectpointA2); }
+            else { SET(isectpt2,isectpointA1); }      
+        }
+        else
+        {
+            if(smallest2==0) { SET(isectpt2,isectpointB2); }
+            else { SET(isectpt2,isectpointB1); } 
+        }
+    }
+    return 1;
+}
+
+
+
+
+
+// Find the intersectiojn point between a coplanar line segement,
+// defined by X1 and X2, and a ray defined by X3 and direction N.
+//
+// This forumla for this calculation is:
+//               (c x b) . (a x b)
+//   Q = x1 + a -------------------
+//                  | a x b | ^2
+//
+// where a = x2 - x1
+//       b = x4 - x3
+//       c = x3 - x1
+// x1 and x2 are the edges of the triangle, and x3 is CoplanarPt
+//  and x4 is (CoplanarPt - n)
+#if 0 // not used anywhere
+static int
+    IntersectLineSegmentRay(dVector3 x1, dVector3 x2, dVector3 x3, dVector3 n, 
+    dVector3 out_pt)
+{
+    dVector3 a, b, c, x4;
+
+    ADD(x4, x3, n);  // x4 = x3 + n
+
+    SUB(a, x2, x1);  // a = x2 - x1
+    SUB(b, x4, x3);
+    SUB(c, x3, x1);
+
+    dVector3 tmp1, tmp2;
+    CROSS(tmp1, c, b);
+    CROSS(tmp2, a, b);
+
+    dReal num, denom;
+    num = dCalcVectorDot3(tmp1, tmp2);
+    denom = LENGTH( tmp2 ); 
+
+    dReal s;
+    s = num /(denom*denom);
+
+    for (int i=0; i<3; i++)
+        out_pt[i] = x1[i] + a[i]*s;
+
+    // Test if this intersection is "behind" x3, w.r.t. n
+    SUB(a, x3, out_pt);
+    if (dCalcVectorDot3(a, n) > 0.0)
+        return 0;
+
+    // Test if this intersection point is outside the edge limits,
+    //  if (dot( (out_pt-x1), (out_pt-x2) ) < 0) it's inside
+    //  else outside
+    SUB(a, out_pt, x1);
+    SUB(b, out_pt, x2);
+    if (dCalcVectorDot3(a,b) < 0.0)
+        return 1;
+    else
+        return 0;
+}
+#endif
+
+// FindTriSolidIntersection - Clips the input trinagle TRI with the 
+//  sides of a convex bounding solid, described by PLANES, returning
+//  the (convex) clipped polygon in CLIPPEDPOLYGON
+//
+static bool
+    FindTriSolidIntrsection(const dVector3 Tri[3], 
+    const dVector4 Planes[6], int numSides,
+    LineContactSet& ClippedPolygon )
+{ 
+    // Set up the LineContactSet structure
+    for (int k=0; k<3; k++) {
+        SET(ClippedPolygon.Points[k], Tri[k]);
+    }
+    ClippedPolygon.Count = 3;
+
+    // Clip wrt the sides
+    for ( int i = 0; i < numSides; i++ )
+        ClipConvexPolygonAgainstPlane( Planes[i], Planes[i][3], ClippedPolygon );
+
+    return (ClippedPolygon.Count > 0);
+}
+
+
+
+
+// ClipConvexPolygonAgainstPlane - Clip a a convex polygon, described by
+//  CONTACTS, with a plane (described by N and C).  Note:  the input 
+//  vertices are assumed to be in counterclockwise order.  
+//
+// This code is taken from The Nebula Device:
+//  http://nebuladevice.sourceforge.net/cgi-bin/twiki/view/Nebula/WebHome
+// and is licensed under the following license:
+//  http://nebuladevice.sourceforge.net/doc/source/license.txt
+//
+static void ClipConvexPolygonAgainstPlane( const dVector3 N, dReal C, LineContactSet& Contacts )
+{
+    // test on which side of line are the vertices
+    int Positive = 0, Negative = 0, PIndex = -1;
+    int Quantity = Contacts.Count;
+
+    dReal Test[8];
+    for ( int i = 0; i < Contacts.Count; i++ ) {
+        // An epsilon is used here because it is possible for the dot product
+        // and C to be exactly equal to each other (in theory), but differ
+        // slightly because of floating point problems.  Thus, add a little
+        // to the test number to push actually equal numbers over the edge
+        // towards the positive.  This should probably be somehow a relative
+        // tolerance, and I don't think multiplying by the constant is the best
+        // way to do this.
+        Test[i] = dCalcVectorDot3(N, Contacts.Points[i]) - C + dFabs(C)*REAL(1e-08);
+
+        if (Test[i] >= REAL(0.0)) {
+            Positive++;
+            if (PIndex < 0) {
+                PIndex = i;
+            }
+        }
+        else Negative++;
+    }
+
+    if (Positive > 0) {
+        if (Negative > 0) {
+            // plane transversely intersects polygon
+            dVector3 CV[8];
+            int CQuantity = 0, Cur, Prv;
+            dReal T;
+
+            if (PIndex > 0) {
+                // first clip vertex on line
+                Cur = PIndex;
+                Prv = Cur - 1;
+                T = Test[Cur] / (Test[Cur] - Test[Prv]);
+                CV[CQuantity][0] = Contacts.Points[Cur][0] 
+                    + T * (Contacts.Points[Prv][0] - Contacts.Points[Cur][0]);
+                CV[CQuantity][1] = Contacts.Points[Cur][1] 
+                    + T * (Contacts.Points[Prv][1] - Contacts.Points[Cur][1]);
+                CV[CQuantity][2] = Contacts.Points[Cur][2] 
+                    + T * (Contacts.Points[Prv][2] - Contacts.Points[Cur][2]);
+                CV[CQuantity][3] = Contacts.Points[Cur][3] 
+                    + T * (Contacts.Points[Prv][3] - Contacts.Points[Cur][3]);
+                CQuantity++;
+
+                // vertices on positive side of line
+                while (Cur < Quantity && Test[Cur] >= REAL(0.0)) {
+                    CV[CQuantity][0] = Contacts.Points[Cur][0];
+                    CV[CQuantity][1] = Contacts.Points[Cur][1];
+                    CV[CQuantity][2] = Contacts.Points[Cur][2];
+                    CV[CQuantity][3] = Contacts.Points[Cur][3];
+                    CQuantity++;
+                    Cur++;
+                }
+
+                // last clip vertex on line
+                if (Cur < Quantity) {
+                    Prv = Cur - 1;
+                }
+                else {
+                    Cur = 0;
+                    Prv = Quantity - 1;
+                }
+
+                T = Test[Cur] / (Test[Cur] - Test[Prv]);
+                CV[CQuantity][0] = Contacts.Points[Cur][0] 
+                    + T * (Contacts.Points[Prv][0] - Contacts.Points[Cur][0]);
+                CV[CQuantity][1] = Contacts.Points[Cur][1] 
+                    + T * (Contacts.Points[Prv][1] - Contacts.Points[Cur][1]);
+                CV[CQuantity][2] = Contacts.Points[Cur][2] 
+                    + T * (Contacts.Points[Prv][2] - Contacts.Points[Cur][2]);
+                CV[CQuantity][3] = Contacts.Points[Cur][3] 
+                    + T * (Contacts.Points[Prv][3] - Contacts.Points[Cur][3]);
+                CQuantity++;
+            }
+            else {
+                // iPIndex is 0
+                // vertices on positive side of line
+                Cur = 0;
+                while (Cur < Quantity && Test[Cur] >= REAL(0.0)) {
+                    CV[CQuantity][0] = Contacts.Points[Cur][0];
+                    CV[CQuantity][1] = Contacts.Points[Cur][1];
+                    CV[CQuantity][2] = Contacts.Points[Cur][2];
+                    CV[CQuantity][3] = Contacts.Points[Cur][3];
+                    CQuantity++;
+                    Cur++;
+                }
+
+                // last clip vertex on line
+                Prv = Cur - 1;
+                T = Test[Cur] / (Test[Cur] - Test[Prv]);
+                CV[CQuantity][0] = Contacts.Points[Cur][0] 
+                    + T * (Contacts.Points[Prv][0] - Contacts.Points[Cur][0]);
+                CV[CQuantity][1] = Contacts.Points[Cur][1] 
+                    + T * (Contacts.Points[Prv][1] - Contacts.Points[Cur][1]);
+                CV[CQuantity][2] = Contacts.Points[Cur][2] 
+                    + T * (Contacts.Points[Prv][2] - Contacts.Points[Cur][2]);
+                CV[CQuantity][3] = Contacts.Points[Cur][3] 
+                    + T * (Contacts.Points[Prv][3] - Contacts.Points[Cur][3]);
+                CQuantity++;
+
+                // skip vertices on negative side
+                while (Cur < Quantity && Test[Cur] < REAL(0.0)) {
+                    Cur++;
+                }
+
+                // first clip vertex on line
+                if (Cur < Quantity) {
+                    Prv = Cur - 1;
+                    T = Test[Cur] / (Test[Cur] - Test[Prv]);
+                    CV[CQuantity][0] = Contacts.Points[Cur][0] 
+                        + T * (Contacts.Points[Prv][0] - Contacts.Points[Cur][0]);
+                    CV[CQuantity][1] = Contacts.Points[Cur][1] 
+                              + T * (Contacts.Points[Prv][1] - Contacts.Points[Cur][1]);
+                    CV[CQuantity][2] = Contacts.Points[Cur][2] 
+                        + T * (Contacts.Points[Prv][2] - Contacts.Points[Cur][2]);
+                    CV[CQuantity][3] = Contacts.Points[Cur][3] 
+                        + T * (Contacts.Points[Prv][3] - Contacts.Points[Cur][3]);
+                    CQuantity++;
+
+                    // vertices on positive side of line
+                    while (Cur < Quantity && Test[Cur] >= REAL(0.0)) {
+                        CV[CQuantity][0] = Contacts.Points[Cur][0];
+                        CV[CQuantity][1] = Contacts.Points[Cur][1];
+                        CV[CQuantity][2] = Contacts.Points[Cur][2];
+                        CV[CQuantity][3] = Contacts.Points[Cur][3];
+                        CQuantity++;
+                        Cur++;
+                    }
+                }
+                else {
+                    // iCur = 0
+                    Prv = Quantity - 1;
+                    T = Test[0] / (Test[0] - Test[Prv]);
+                    CV[CQuantity][0] = Contacts.Points[0][0] 
+                        + T * (Contacts.Points[Prv][0] - Contacts.Points[0][0]);
+                    CV[CQuantity][1] = Contacts.Points[0][1] 
+                              + T * (Contacts.Points[Prv][1] - Contacts.Points[0][1]);
+                    CV[CQuantity][2] = Contacts.Points[0][2] 
+                        + T * (Contacts.Points[Prv][2] - Contacts.Points[0][2]);
+                    CV[CQuantity][3] = Contacts.Points[0][3] 
+                        + T * (Contacts.Points[Prv][3] - Contacts.Points[0][3]);
+                    CQuantity++;
+                }
+            }
+            Quantity = CQuantity;
+            memcpy( Contacts.Points, CV, CQuantity * sizeof(dVector3) );
+        }
+        // else polygon fully on positive side of plane, nothing to do    
+        Contacts.Count = Quantity;
+    }
+    else {
+        Contacts.Count = 0; // This should not happen, but for safety
+    }
+
+}
+
+
+
+// Determine if a potential collision point is 
+//
+//
+static int
+ExamineContactPoint(dVector3* v_col, dVector3 in_n, dVector3 in_point)
+{
+    // Cast a ray from in_point, along the collison normal. Does it intersect the
+    //  collision face.
+    dReal t, u, v;
+
+    if (!RayTriangleIntersect(in_point, in_n, v_col[0], v_col[1], v_col[2],
+        &t, &u, &v))
+        return 0;
+    else
+        return 1;
+}
+
+
+
+// RayTriangleIntersect - If an intersection is found, t contains the
+//   distance along the ray (dir) and u/v contain u/v coordinates into
+//   the triangle.  Returns 0 if no hit is found
+//   From "Real-Time Rendering," page 305
+//
+static int
+RayTriangleIntersect(const dVector3 orig, const dVector3 dir,
+                     const dVector3 vert0, const dVector3 vert1,const dVector3 vert2,
+                     dReal *t,dReal *u,dReal *v)
+
+{
+    dReal edge1[3], edge2[3], tvec[3], pvec[3], qvec[3];
+    dReal det,inv_det;
+
+    // find vectors for two edges sharing vert0
+    SUB(edge1, vert1, vert0);
+    SUB(edge2, vert2, vert0);
+
+    // begin calculating determinant - also used to calculate U parameter
+    CROSS(pvec, dir, edge2);
+
+    // if determinant is near zero, ray lies in plane of triangle
+    det = DOT(edge1, pvec);
+
+    if ((det > REAL(-0.001)) && (det < REAL(0.001)))
+        return 0;
+    inv_det = 1.0 / det;
+
+    // calculate distance from vert0 to ray origin 
+    SUB(tvec, orig, vert0);
+
+    // calculate U parameter and test bounds
+    *u = DOT(tvec, pvec) * inv_det;
+    if ((*u < 0.0) || (*u > 1.0))
+        return 0;
+
+    // prepare to test V parameter
+    CROSS(qvec, tvec, edge1);
+
+    // calculate V parameter and test bounds
+    *v = DOT(dir, qvec) * inv_det;
+    if ((*v < 0.0) || ((*u + *v) > 1.0))
+        return 0;
+
+    // calculate t, ray intersects triangle
+    *t = DOT(edge2, qvec) * inv_det;
+
+    return 1;
+}
+
+
+
+static bool
+SimpleUnclippedTest(dVector3 in_CoplanarPt, dVector3 in_v, dVector3 in_elt,
+                    dVector3 in_n, dVector3* in_col_v, dReal &out_depth)
+{
+    dReal dp = 0.0;
+    dReal contact_elt_length;
+
+    DEPTH(dp, in_CoplanarPt, in_v, in_n);
+
+    if (dp >= 0.0) {
+        // if the penetration depth (calculated above) is more than
+        //  the contact point's ELT, then we've chosen the wrong face
+        //  and should switch faces
+        contact_elt_length = dFabs(dCalcVectorDot3(in_elt, in_n));
+
+        if (dp == 0.0)
+            dp = dMin(DISTANCE_EPSILON, contact_elt_length);
+
+        if ((contact_elt_length < SMALL_ELT) && (dp < EXPANDED_ELT_THRESH))
+            dp = contact_elt_length;
+
+        if ( (dp > 0.0) && (dp <= contact_elt_length)) {
+            // Add a contact
+
+            if ( ExamineContactPoint(in_col_v, in_n, in_v) ) {
+                out_depth = dp;
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
+
+
+
+// Generate a "unique" contact.  A unique contact has a unique
+//   position or normal.  If the potential contact has the same
+//   position and normal as an existing contact, but a larger
+//   penetration depth, this new depth is used instead
+//
+static void
+GenerateContact(int in_Flags, dContactGeom* in_Contacts, int in_Stride,  
+                dxTriMesh* in_TriMesh1,  dxTriMesh* in_TriMesh2,
+                int TriIndex1, int TriIndex2,
+                const dVector3 in_ContactPos, const dVector3 in_Normal, dReal in_Depth,
+                int& OutTriCount)
+{
+    /*
+    NOTE by Oleh_Derevenko:
+    This function is called after maximal number of contacts has already been 
+    collected because it has a side effect of replacing penetration depth of
+    existing contact with larger penetration depth of another matching normal contact.
+    If this logic is not necessary any more, you can bail out on reach of contact
+    number maximum immediately in dCollideTTL(). You will also need to correct 
+    conditional statements after invocations of GenerateContact() in dCollideTTL().
+    */
+    dIASSERT(in_Depth >= 0.0);
+    //if (in_Depth < 0.0) -- the function is always called with depth >= 0
+    //    return;
+
+    do 
+    {
+        dContactGeom* Contact;
+        dVector3 diff;
+
+        if (!(in_Flags & CONTACTS_UNIMPORTANT))
+        {
+            bool duplicate = false;
+
+            for (int i=0; i<OutTriCount; i++) 
+            {
+                Contact = SAFECONTACT(in_Flags, in_Contacts, i, in_Stride);
+
+                // same position?
+                SUB(diff, in_ContactPos, Contact->pos);
+                if (dCalcVectorDot3(diff, diff) < dEpsilon) 
+                {
+                    // same normal?
+                    if (REAL(1.0) - dFabs(dCalcVectorDot3(in_Normal, Contact->normal)) < dEpsilon)
+                    {
+                        if (in_Depth > Contact->depth) {
+                            Contact->depth = in_Depth;
+                            SMULT( Contact->normal, in_Normal, -1.0);
+                            Contact->normal[3] = 0.0;
+                        }
+                        duplicate = true;
+                        /*
+                        NOTE by Oleh_Derevenko:
+                        There may be a case when two normals are close to each other but no duplicate
+                        while third normal is detected to be duplicate for both of them.
+                        This is the only reason I can think of, there is no "break" statement.
+                        Perhaps author considered it to be logical that the third normal would 
+                        replace the depth in both of initial contacts. 
+                        However, I consider it a questionable practice which should not
+                        be applied without deep understanding of underlaying physics.
+                        Even more, is this situation with close normal triplet acceptable at all?
+                        Should not be two initial contacts reduced to one (replaced with the latter)?
+                        If you know the answers for these questions, you may want to change this code.
+                        See the same statement in GenerateContact() of collision_trimesh_box.cpp
+                        */
+                    }
+                }
+            }
+
+            if (duplicate || OutTriCount == (in_Flags & NUMC_MASK))
+            {
+                break;
+            }
+        }
+        else 
+        {
+            dIASSERT(OutTriCount < (in_Flags & NUMC_MASK));
+        }
+
+        // Add a new contact
+        Contact = SAFECONTACT(in_Flags, in_Contacts, OutTriCount, in_Stride);
+
+        SET( Contact->pos, in_ContactPos );
+        Contact->pos[3] = 0.0;
+
+        SMULT( Contact->normal, in_Normal, -1.0);
+        Contact->normal[3] = 0.0;
+
+        Contact->depth = in_Depth;
+
+        Contact->g1 = in_TriMesh1;
+        Contact->g2 = in_TriMesh2;
+
+        Contact->side1 = TriIndex1;
+        Contact->side2 = TriIndex2;
+
+        OutTriCount++;
+    }
+    while (false);
+}
+
+
+#endif // dTRIMESH_OPCODE_USE_OLD_TRIMESH_TRIMESH_COLLIDER
+
+
+#endif // dTRIMESH_OPCODE
+
+
+#endif // dTRIMESH_ENABLED
diff --git a/libs/ode-0.16.1/ode/src/collision_util.cpp b/libs/ode-0.16.1/ode/src/collision_util.cpp
new file mode 100644
index 0000000..39ac87e
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_util.cpp
@@ -0,0 +1,613 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+some useful collision utility stuff. this includes some API utility
+functions that are defined in the public header files.
+
+*/
+
+#include <ode/common.h>
+#include <ode/collision.h>
+#include "config.h"
+#include "odemath.h"
+#include "collision_util.h"
+
+//****************************************************************************
+
+int dCollideSpheres (dVector3 p1, dReal r1,
+                     dVector3 p2, dReal r2, dContactGeom *c)
+{
+    // printf ("d=%.2f  (%.2f %.2f %.2f) (%.2f %.2f %.2f) r1=%.2f r2=%.2f\n",
+    //	  d,p1[0],p1[1],p1[2],p2[0],p2[1],p2[2],r1,r2);
+
+    dReal d = dCalcPointsDistance3(p1,p2);
+    if (d > (r1 + r2)) return 0;
+    if (d <= 0) {
+        c->pos[0] = p1[0];
+        c->pos[1] = p1[1];
+        c->pos[2] = p1[2];
+        c->normal[0] = 1;
+        c->normal[1] = 0;
+        c->normal[2] = 0;
+        c->depth = r1 + r2;
+    }
+    else {
+        dReal d1 = dRecip (d);
+        c->normal[0] = (p1[0]-p2[0])*d1;
+        c->normal[1] = (p1[1]-p2[1])*d1;
+        c->normal[2] = (p1[2]-p2[2])*d1;
+        dReal k = REAL(0.5) * (r2 - r1 - d);
+        c->pos[0] = p1[0] + c->normal[0]*k;
+        c->pos[1] = p1[1] + c->normal[1]*k;
+        c->pos[2] = p1[2] + c->normal[2]*k;
+        c->depth = r1 + r2 - d;
+    }
+    return 1;
+}
+
+
+void dLineClosestApproach (const dVector3 pa, const dVector3 ua,
+                           const dVector3 pb, const dVector3 ub,
+                           dReal *alpha, dReal *beta)
+{
+    dVector3 p;
+    p[0] = pb[0] - pa[0];
+    p[1] = pb[1] - pa[1];
+    p[2] = pb[2] - pa[2];
+    dReal uaub = dCalcVectorDot3(ua,ub);
+    dReal q1 =  dCalcVectorDot3(ua,p);
+    dReal q2 = -dCalcVectorDot3(ub,p);
+    dReal d = 1-uaub*uaub;
+    if (d <= REAL(0.0001)) {
+        // @@@ this needs to be made more robust
+        *alpha = 0;
+        *beta  = 0;
+    }
+    else {
+        d = dRecip(d);
+        *alpha = (q1 + uaub*q2)*d;
+        *beta  = (uaub*q1 + q2)*d;
+    }
+}
+
+
+// given two line segments A and B with endpoints a1-a2 and b1-b2, return the
+// points on A and B that are closest to each other (in cp1 and cp2).
+// in the case of parallel lines where there are multiple solutions, a
+// solution involving the endpoint of at least one line will be returned.
+// this will work correctly for zero length lines, e.g. if a1==a2 and/or
+// b1==b2.
+//
+// the algorithm works by applying the voronoi clipping rule to the features
+// of the line segments. the three features of each line segment are the two
+// endpoints and the line between them. the voronoi clipping rule states that,
+// for feature X on line A and feature Y on line B, the closest points PA and
+// PB between X and Y are globally the closest points if PA is in V(Y) and
+// PB is in V(X), where V(X) is the voronoi region of X.
+
+void dClosestLineSegmentPoints (const dVector3 a1, const dVector3 a2,
+                                const dVector3 b1, const dVector3 b2,
+                                dVector3 cp1, dVector3 cp2)
+{
+    dVector3 a1a2,b1b2,a1b1,a1b2,a2b1,a2b2,n;
+    dReal la,lb,k,da1,da2,da3,da4,db1,db2,db3,db4,det;
+
+#define SET2(a,b) a[0]=b[0]; a[1]=b[1]; a[2]=b[2];
+#define SET3(a,b,op,c) a[0]=b[0] op c[0]; a[1]=b[1] op c[1]; a[2]=b[2] op c[2];
+
+    // check vertex-vertex features
+
+    SET3 (a1a2,a2,-,a1);
+    SET3 (b1b2,b2,-,b1);
+    SET3 (a1b1,b1,-,a1);
+    da1 = dCalcVectorDot3(a1a2,a1b1);
+    db1 = dCalcVectorDot3(b1b2,a1b1);
+    if (da1 <= 0 && db1 >= 0) {
+        SET2 (cp1,a1);
+        SET2 (cp2,b1);
+        return;
+    }
+
+    SET3 (a1b2,b2,-,a1);
+    da2 = dCalcVectorDot3(a1a2,a1b2);
+    db2 = dCalcVectorDot3(b1b2,a1b2);
+    if (da2 <= 0 && db2 <= 0) {
+        SET2 (cp1,a1);
+        SET2 (cp2,b2);
+        return;
+    }
+
+    SET3 (a2b1,b1,-,a2);
+    da3 = dCalcVectorDot3(a1a2,a2b1);
+    db3 = dCalcVectorDot3(b1b2,a2b1);
+    if (da3 >= 0 && db3 >= 0) {
+        SET2 (cp1,a2);
+        SET2 (cp2,b1);
+        return;
+    }
+
+    SET3 (a2b2,b2,-,a2);
+    da4 = dCalcVectorDot3(a1a2,a2b2);
+    db4 = dCalcVectorDot3(b1b2,a2b2);
+    if (da4 >= 0 && db4 <= 0) {
+        SET2 (cp1,a2);
+        SET2 (cp2,b2);
+        return;
+    }
+
+    // check edge-vertex features.
+    // if one or both of the lines has zero length, we will never get to here,
+    // so we do not have to worry about the following divisions by zero.
+
+    la = dCalcVectorDot3(a1a2,a1a2);
+    if (da1 >= 0 && da3 <= 0) {
+        k = da1 / la;
+        SET3 (n,a1b1,-,k*a1a2);
+        if (dCalcVectorDot3(b1b2,n) >= 0) {
+            SET3 (cp1,a1,+,k*a1a2);
+            SET2 (cp2,b1);
+            return;
+        }
+    }
+
+    if (da2 >= 0 && da4 <= 0) {
+        k = da2 / la;
+        SET3 (n,a1b2,-,k*a1a2);
+        if (dCalcVectorDot3(b1b2,n) <= 0) {
+            SET3 (cp1,a1,+,k*a1a2);
+            SET2 (cp2,b2);
+            return;
+        }
+    }
+
+    lb = dCalcVectorDot3(b1b2,b1b2);
+    if (db1 <= 0 && db2 >= 0) {
+        k = -db1 / lb;
+        SET3 (n,-a1b1,-,k*b1b2);
+        if (dCalcVectorDot3(a1a2,n) >= 0) {
+            SET2 (cp1,a1);
+            SET3 (cp2,b1,+,k*b1b2);
+            return;
+        }
+    }
+
+    if (db3 <= 0 && db4 >= 0) {
+        k = -db3 / lb;
+        SET3 (n,-a2b1,-,k*b1b2);
+        if (dCalcVectorDot3(a1a2,n) <= 0) {
+            SET2 (cp1,a2);
+            SET3 (cp2,b1,+,k*b1b2);
+            return;
+        }
+    }
+
+    // it must be edge-edge
+
+    k = dCalcVectorDot3(a1a2,b1b2);
+    det = la*lb - k*k;
+    if (det <= 0) {
+        // this should never happen, but just in case...
+        SET2(cp1,a1);
+        SET2(cp2,b1);
+        return;
+    }
+    det = dRecip (det);
+    dReal alpha = (lb*da1 -  k*db1) * det;
+    dReal beta  = ( k*da1 - la*db1) * det;
+    SET3 (cp1,a1,+,alpha*a1a2);
+    SET3 (cp2,b1,+,beta*b1b2);
+
+# undef SET2
+# undef SET3
+}
+
+
+// a simple root finding algorithm is used to find the value of 't' that
+// satisfies:
+//		d|D(t)|^2/dt = 0
+// where:
+//		|D(t)| = |p(t)-b(t)|
+// where p(t) is a point on the line parameterized by t:
+//		p(t) = p1 + t*(p2-p1)
+// and b(t) is that same point clipped to the boundary of the box. in box-
+// relative coordinates d|D(t)|^2/dt is the sum of three x,y,z components
+// each of which looks like this:
+//
+//	    t_lo     /
+//	      ______/    -->t
+//	     /     t_hi
+//	    /
+//
+// t_lo and t_hi are the t values where the line passes through the planes
+// corresponding to the sides of the box. the algorithm computes d|D(t)|^2/dt
+// in a piecewise fashion from t=0 to t=1, stopping at the point where
+// d|D(t)|^2/dt crosses from negative to positive.
+
+void dClosestLineBoxPoints (const dVector3 p1, const dVector3 p2,
+                            const dVector3 c, const dMatrix3 R,
+                            const dVector3 side,
+                            dVector3 lret, dVector3 bret)
+{
+    int i;
+
+    // compute the start and delta of the line p1-p2 relative to the box.
+    // we will do all subsequent computations in this box-relative coordinate
+    // system. we have to do a translation and rotation for each point.
+    dVector3 tmp,s,v;
+    tmp[0] = p1[0] - c[0];
+    tmp[1] = p1[1] - c[1];
+    tmp[2] = p1[2] - c[2];
+    dMultiply1_331 (s,R,tmp);
+    tmp[0] = p2[0] - p1[0];
+    tmp[1] = p2[1] - p1[1];
+    tmp[2] = p2[2] - p1[2];
+    dMultiply1_331 (v,R,tmp);
+
+    // mirror the line so that v has all components >= 0
+    dVector3 sign;
+    for (i=0; i<3; i++) {
+        if (v[i] < 0) {
+            s[i] = -s[i];
+            v[i] = -v[i];
+            sign[i] = -1;
+        }
+        else sign[i] = 1;
+    }
+
+    // compute v^2
+    dVector3 v2;
+    v2[0] = v[0]*v[0];
+    v2[1] = v[1]*v[1];
+    v2[2] = v[2]*v[2];
+
+    // compute the half-sides of the box
+    dReal h[3];
+    h[0] = REAL(0.5) * side[0];
+    h[1] = REAL(0.5) * side[1];
+    h[2] = REAL(0.5) * side[2];
+
+    // region is -1,0,+1 depending on which side of the box planes each
+    // coordinate is on. tanchor is the next t value at which there is a
+    // transition, or the last one if there are no more.
+    int region[3];
+    dReal tanchor[3];
+
+    // Denormals are a problem, because we divide by v[i], and then 
+    // multiply that by 0. Alas, infinity times 0 is infinity (!)
+    // We also use v2[i], which is v[i] squared. Here's how the epsilons 
+    // are chosen:
+    // float epsilon = 1.175494e-038 (smallest non-denormal number)
+    // double epsilon = 2.225074e-308 (smallest non-denormal number)
+    // For single precision, choose an epsilon such that v[i] squared is 
+    // not a denormal; this is for performance.
+    // For double precision, choose an epsilon such that v[i] is not a 
+    // denormal; this is for correctness. (Jon Watte on mailinglist)
+
+#if defined( dSINGLE )
+    const dReal tanchor_eps = REAL(1e-19);
+#else
+    const dReal tanchor_eps = REAL(1e-307);
+#endif
+
+    // find the region and tanchor values for p1
+    for (i=0; i<3; i++) {
+        if (v[i] > tanchor_eps) {
+            if (s[i] < -h[i]) {
+                region[i] = -1;
+                tanchor[i] = (-h[i]-s[i])/v[i];
+            }
+            else {
+                region[i] = (s[i] > h[i]);
+                tanchor[i] = (h[i]-s[i])/v[i];
+            }
+        }
+        else {
+            region[i] = 0;
+            tanchor[i] = 2;		// this will never be a valid tanchor
+        }
+    }
+
+    // compute d|d|^2/dt for t=0. if it's >= 0 then p1 is the closest point
+    dReal t=0;
+    dReal dd2dt = 0;
+    for (i=0; i<3; i++) dd2dt -= (region[i] ? v2[i] : 0) * tanchor[i];
+    if (dd2dt >= 0) goto got_answer;
+
+    do {
+        // find the point on the line that is at the next clip plane boundary
+        dReal next_t = 1;
+        for (i=0; i<3; i++) {
+            if (tanchor[i] > t && tanchor[i] < 1 && tanchor[i] < next_t)
+                next_t = tanchor[i];
+        }
+
+        // compute d|d|^2/dt for the next t
+        dReal next_dd2dt = 0;
+        for (i=0; i<3; i++) {
+            next_dd2dt += (region[i] ? v2[i] : 0) * (next_t - tanchor[i]);
+        }
+
+        // if the sign of d|d|^2/dt has changed, solution = the crossover point
+        if (next_dd2dt >= 0) {
+            dReal m = (next_dd2dt-dd2dt)/(next_t - t);
+            t -= dd2dt/m;
+            goto got_answer;
+        }
+
+        // advance to the next anchor point / region
+        for (i=0; i<3; i++) {
+            if (tanchor[i] == next_t) {
+                tanchor[i] = (h[i]-s[i])/v[i];
+                region[i]++;
+            }
+        }
+        t = next_t;
+        dd2dt = next_dd2dt;
+    }
+    while (t < 1);
+    t = 1;
+
+got_answer:
+
+    // compute closest point on the line
+    for (i=0; i<3; i++) lret[i] = p1[i] + t*tmp[i];	// note: tmp=p2-p1
+
+    // compute closest point on the box
+    for (i=0; i<3; i++) {
+        tmp[i] = sign[i] * (s[i] + t*v[i]);
+        if (tmp[i] < -h[i]) tmp[i] = -h[i];
+        else if (tmp[i] > h[i]) tmp[i] = h[i];
+    }
+    dMultiply0_331 (s,R,tmp);
+    for (i=0; i<3; i++) bret[i] = s[i] + c[i];
+}
+
+
+// given boxes (p1,R1,side1) and (p1,R1,side1), return 1 if they intersect
+// or 0 if not.
+
+int dBoxTouchesBox (const dVector3 p1, const dMatrix3 R1,
+                    const dVector3 side1, const dVector3 p2,
+                    const dMatrix3 R2, const dVector3 side2)
+{
+    // two boxes are disjoint if (and only if) there is a separating axis
+    // perpendicular to a face from one box or perpendicular to an edge from
+    // either box. the following tests are derived from:
+    //    "OBB Tree: A Hierarchical Structure for Rapid Interference Detection",
+    //    S.Gottschalk, M.C.Lin, D.Manocha., Proc of ACM Siggraph 1996.
+
+    // Rij is R1'*R2, i.e. the relative rotation between R1 and R2.
+    // Qij is abs(Rij)
+    dVector3 p,pp;
+    dReal A1,A2,A3,B1,B2,B3,R11,R12,R13,R21,R22,R23,R31,R32,R33,
+        Q11,Q12,Q13,Q21,Q22,Q23,Q31,Q32,Q33;
+
+    // get vector from centers of box 1 to box 2, relative to box 1
+    p[0] = p2[0] - p1[0];
+    p[1] = p2[1] - p1[1];
+    p[2] = p2[2] - p1[2];
+    dMultiply1_331 (pp,R1,p);		// get pp = p relative to body 1
+
+    // get side lengths / 2
+    A1 = side1[0]*REAL(0.5); A2 = side1[1]*REAL(0.5); A3 = side1[2]*REAL(0.5);
+    B1 = side2[0]*REAL(0.5); B2 = side2[1]*REAL(0.5); B3 = side2[2]*REAL(0.5);
+
+    // for the following tests, excluding computation of Rij, in the worst case,
+    // 15 compares, 60 adds, 81 multiplies, and 24 absolutes.
+    // notation: R1=[u1 u2 u3], R2=[v1 v2 v3]
+
+    // separating axis = u1,u2,u3
+    R11 = dCalcVectorDot3_44(R1+0,R2+0); R12 = dCalcVectorDot3_44(R1+0,R2+1); R13 = dCalcVectorDot3_44(R1+0,R2+2);
+    Q11 = dFabs(R11); Q12 = dFabs(R12); Q13 = dFabs(R13);
+    if (dFabs(pp[0]) > (A1 + B1*Q11 + B2*Q12 + B3*Q13)) return 0;
+    R21 = dCalcVectorDot3_44(R1+1,R2+0); R22 = dCalcVectorDot3_44(R1+1,R2+1); R23 = dCalcVectorDot3_44(R1+1,R2+2);
+    Q21 = dFabs(R21); Q22 = dFabs(R22); Q23 = dFabs(R23);
+    if (dFabs(pp[1]) > (A2 + B1*Q21 + B2*Q22 + B3*Q23)) return 0;
+    R31 = dCalcVectorDot3_44(R1+2,R2+0); R32 = dCalcVectorDot3_44(R1+2,R2+1); R33 = dCalcVectorDot3_44(R1+2,R2+2);
+    Q31 = dFabs(R31); Q32 = dFabs(R32); Q33 = dFabs(R33);
+    if (dFabs(pp[2]) > (A3 + B1*Q31 + B2*Q32 + B3*Q33)) return 0;
+
+    // separating axis = v1,v2,v3
+    if (dFabs(dCalcVectorDot3_41(R2+0,p)) > (A1*Q11 + A2*Q21 + A3*Q31 + B1)) return 0;
+    if (dFabs(dCalcVectorDot3_41(R2+1,p)) > (A1*Q12 + A2*Q22 + A3*Q32 + B2)) return 0;
+    if (dFabs(dCalcVectorDot3_41(R2+2,p)) > (A1*Q13 + A2*Q23 + A3*Q33 + B3)) return 0;
+
+    // separating axis = u1 x (v1,v2,v3)
+    if (dFabs(pp[2]*R21-pp[1]*R31) > A2*Q31 + A3*Q21 + B2*Q13 + B3*Q12) return 0;
+    if (dFabs(pp[2]*R22-pp[1]*R32) > A2*Q32 + A3*Q22 + B1*Q13 + B3*Q11) return 0;
+    if (dFabs(pp[2]*R23-pp[1]*R33) > A2*Q33 + A3*Q23 + B1*Q12 + B2*Q11) return 0;
+
+    // separating axis = u2 x (v1,v2,v3)
+    if (dFabs(pp[0]*R31-pp[2]*R11) > A1*Q31 + A3*Q11 + B2*Q23 + B3*Q22) return 0;
+    if (dFabs(pp[0]*R32-pp[2]*R12) > A1*Q32 + A3*Q12 + B1*Q23 + B3*Q21) return 0;
+    if (dFabs(pp[0]*R33-pp[2]*R13) > A1*Q33 + A3*Q13 + B1*Q22 + B2*Q21) return 0;
+
+    // separating axis = u3 x (v1,v2,v3)
+    if (dFabs(pp[1]*R11-pp[0]*R21) > A1*Q21 + A2*Q11 + B2*Q33 + B3*Q32) return 0;
+    if (dFabs(pp[1]*R12-pp[0]*R22) > A1*Q22 + A2*Q12 + B1*Q33 + B3*Q31) return 0;
+    if (dFabs(pp[1]*R13-pp[0]*R23) > A1*Q23 + A2*Q13 + B1*Q32 + B2*Q31) return 0;
+
+    return 1;
+}
+
+//****************************************************************************
+// other utility functions
+
+/*ODE_API */void dInfiniteAABB (dxGeom *geom, dReal aabb[6])
+{
+    aabb[0] = -dInfinity;
+    aabb[1] = dInfinity;
+    aabb[2] = -dInfinity;
+    aabb[3] = dInfinity;
+    aabb[4] = -dInfinity;
+    aabb[5] = dInfinity;
+}
+
+
+//****************************************************************************
+// Helpers for Croteam's collider - by Nguyen Binh
+
+int dClipEdgeToPlane( dVector3 &vEpnt0, dVector3 &vEpnt1, const dVector4& plPlane)
+{
+    // calculate distance of edge points to plane
+    dReal fDistance0 = dPointPlaneDistance(  vEpnt0 ,plPlane );
+    dReal fDistance1 = dPointPlaneDistance(  vEpnt1 ,plPlane );
+
+    // if both points are behind the plane
+    if ( fDistance0 < 0 && fDistance1 < 0 ) 
+    {
+        // do nothing
+        return 0;
+        // if both points in front of the plane
+    } 
+    else if ( fDistance0 > 0 && fDistance1 > 0 ) 
+    {
+        // accept them
+        return 1;
+        // if we have edge/plane intersection
+    } else if ((fDistance0 > 0 && fDistance1 < 0) || ( fDistance0 < 0 && fDistance1 > 0)) 
+    {
+
+        // find intersection point of edge and plane
+        dVector3 vIntersectionPoint;
+        vIntersectionPoint[0]= vEpnt0[0]-(vEpnt0[0]-vEpnt1[0])*fDistance0/(fDistance0-fDistance1);
+        vIntersectionPoint[1]= vEpnt0[1]-(vEpnt0[1]-vEpnt1[1])*fDistance0/(fDistance0-fDistance1);
+        vIntersectionPoint[2]= vEpnt0[2]-(vEpnt0[2]-vEpnt1[2])*fDistance0/(fDistance0-fDistance1);
+
+        // clamp correct edge to intersection point
+        if ( fDistance0 < 0 ) 
+        {
+            dVector3Copy(vIntersectionPoint,vEpnt0);
+        } else 
+        {
+            dVector3Copy(vIntersectionPoint,vEpnt1);
+        }
+        return 1;
+    }
+    return 1;
+}
+
+// clip polygon with plane and generate new polygon points
+void		 dClipPolyToPlane( const dVector3 avArrayIn[], const int ctIn, 
+                              dVector3 avArrayOut[], int &ctOut, 
+                              const dVector4 &plPlane )
+{
+    // start with no output points
+    ctOut = 0;
+
+    int i0 = ctIn-1;
+
+    // for each edge in input polygon
+    for (int i1=0; i1<ctIn; i0=i1, i1++) {
+
+
+        // calculate distance of edge points to plane
+        dReal fDistance0 = dPointPlaneDistance(  avArrayIn[i0],plPlane );
+        dReal fDistance1 = dPointPlaneDistance(  avArrayIn[i1],plPlane );
+
+        // if first point is in front of plane
+        if( fDistance0 >= 0 ) {
+            // emit point
+            avArrayOut[ctOut][0] = avArrayIn[i0][0];
+            avArrayOut[ctOut][1] = avArrayIn[i0][1];
+            avArrayOut[ctOut][2] = avArrayIn[i0][2];
+            ctOut++;
+        }
+
+        // if points are on different sides
+        if( (fDistance0 > 0 && fDistance1 < 0) || ( fDistance0 < 0 && fDistance1 > 0) ) {
+
+            // find intersection point of edge and plane
+            dVector3 vIntersectionPoint;
+            vIntersectionPoint[0]= avArrayIn[i0][0] - 
+                (avArrayIn[i0][0]-avArrayIn[i1][0])*fDistance0/(fDistance0-fDistance1);
+            vIntersectionPoint[1]= avArrayIn[i0][1] - 
+                (avArrayIn[i0][1]-avArrayIn[i1][1])*fDistance0/(fDistance0-fDistance1);
+            vIntersectionPoint[2]= avArrayIn[i0][2] - 
+                (avArrayIn[i0][2]-avArrayIn[i1][2])*fDistance0/(fDistance0-fDistance1);
+
+            // emit intersection point
+            avArrayOut[ctOut][0] = vIntersectionPoint[0];
+            avArrayOut[ctOut][1] = vIntersectionPoint[1];
+            avArrayOut[ctOut][2] = vIntersectionPoint[2];
+            ctOut++;
+        }
+    }
+
+}
+
+void		 dClipPolyToCircle(const dVector3 avArrayIn[], const int ctIn, 
+                               dVector3 avArrayOut[], int &ctOut, 
+                               const dVector4 &plPlane ,dReal fRadius)
+{
+    // start with no output points
+    ctOut = 0;
+
+    int i0 = ctIn-1;
+
+    // for each edge in input polygon
+    for (int i1=0; i1<ctIn; i0=i1, i1++) 
+    {
+        // calculate distance of edge points to plane
+        dReal fDistance0 = dPointPlaneDistance(  avArrayIn[i0],plPlane );
+        dReal fDistance1 = dPointPlaneDistance(  avArrayIn[i1],plPlane );
+
+        // if first point is in front of plane
+        if( fDistance0 >= 0 ) 
+        {
+            // emit point
+            if (dVector3LengthSquare(avArrayIn[i0]) <= fRadius*fRadius)
+            {
+                avArrayOut[ctOut][0] = avArrayIn[i0][0];
+                avArrayOut[ctOut][1] = avArrayIn[i0][1];
+                avArrayOut[ctOut][2] = avArrayIn[i0][2];
+                ctOut++;
+            }
+        }
+
+        // if points are on different sides
+        if( (fDistance0 > 0 && fDistance1 < 0) || ( fDistance0 < 0 && fDistance1 > 0) ) 
+        {
+
+            // find intersection point of edge and plane
+            dVector3 vIntersectionPoint;
+            vIntersectionPoint[0]= avArrayIn[i0][0] - 
+                (avArrayIn[i0][0]-avArrayIn[i1][0])*fDistance0/(fDistance0-fDistance1);
+            vIntersectionPoint[1]= avArrayIn[i0][1] - 
+                (avArrayIn[i0][1]-avArrayIn[i1][1])*fDistance0/(fDistance0-fDistance1);
+            vIntersectionPoint[2]= avArrayIn[i0][2] - 
+                (avArrayIn[i0][2]-avArrayIn[i1][2])*fDistance0/(fDistance0-fDistance1);
+
+            // emit intersection point
+            if (dVector3LengthSquare(avArrayIn[i0]) <= fRadius*fRadius)
+            {
+                avArrayOut[ctOut][0] = vIntersectionPoint[0];
+                avArrayOut[ctOut][1] = vIntersectionPoint[1];
+                avArrayOut[ctOut][2] = vIntersectionPoint[2];
+                ctOut++;
+            }
+        }
+    }	
+}
+
diff --git a/libs/ode-0.16.1/ode/src/collision_util.h b/libs/ode-0.16.1/ode/src/collision_util.h
new file mode 100644
index 0000000..57e116a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/collision_util.h
@@ -0,0 +1,358 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+some useful collision utility stuff.
+
+*/
+
+#ifndef _ODE_COLLISION_UTIL_H_
+#define _ODE_COLLISION_UTIL_H_
+
+#include <ode/common.h>
+#include <ode/contact.h>
+#include <ode/rotation.h>
+#include "odemath.h"
+
+
+// given a pointer `p' to a dContactGeom, return the dContactGeom at
+// p + skip bytes.
+#define CONTACT(p,skip) ((dContactGeom*) (((char*)p) + (skip)))
+
+#if 1
+#include "collision_kernel.h"
+// Fetches a contact
+static inline 
+dContactGeom* SAFECONTACT(int Flags, dContactGeom* Contacts, int Index, int Stride){
+    dIASSERT(Index >= 0 && Index < (Flags & NUMC_MASK));
+    return ((dContactGeom*)(((char*)Contacts) + (Index * Stride)));
+}
+#endif
+
+
+// if the spheres (p1,r1) and (p2,r2) collide, set the contact `c' and
+// return 1, else return 0.
+
+int dCollideSpheres (dVector3 p1, dReal r1,
+                     dVector3 p2, dReal r2, dContactGeom *c);
+
+
+// given two lines
+//    qa = pa + alpha* ua
+//    qb = pb + beta * ub
+// where pa,pb are two points, ua,ub are two unit length vectors, and alpha,
+// beta go from [-inf,inf], return alpha and beta such that qa and qb are
+// as close as possible
+
+void dLineClosestApproach (const dVector3 pa, const dVector3 ua,
+                           const dVector3 pb, const dVector3 ub,
+                           dReal *alpha, dReal *beta);
+
+
+// given a line segment p1-p2 and a box (center 'c', rotation 'R', side length
+// vector 'side'), compute the points of closest approach between the box
+// and the line. return these points in 'lret' (the point on the line) and
+// 'bret' (the point on the box). if the line actually penetrates the box
+// then the solution is not unique, but only one solution will be returned.
+// in this case the solution points will coincide.
+
+void dClosestLineBoxPoints (const dVector3 p1, const dVector3 p2,
+                            const dVector3 c, const dMatrix3 R,
+                            const dVector3 side,
+                            dVector3 lret, dVector3 bret);
+
+// 20 Apr 2004
+// Start code by Nguyen Binh
+int dClipEdgeToPlane(dVector3 &vEpnt0, dVector3 &vEpnt1, const dVector4& plPlane);
+// clip polygon with plane and generate new polygon points
+void dClipPolyToPlane(const dVector3 avArrayIn[], const int ctIn, dVector3 avArrayOut[], int &ctOut, const dVector4 &plPlane );
+
+void dClipPolyToCircle(const dVector3 avArrayIn[], const int ctIn, dVector3 avArrayOut[], int &ctOut, const dVector4 &plPlane ,dReal fRadius);
+
+// Some vector math
+static inline 
+void dVector3Subtract(const dVector3& a,const dVector3& b,dVector3& c)
+{
+    dSubtractVectors3(c, a, b);
+}
+
+static inline 
+void dVector3Scale(dVector3& a,dReal nScale)
+{
+    dScaleVector3(a, nScale);
+}
+
+static inline 
+void dVector3Add(const dVector3& a,const dVector3& b,dVector3& c)
+{
+    dAddVectors3(c, a, b);
+}
+
+static inline 
+void dVector3Copy(const dVector3& a,dVector3& c)
+{
+    dCopyVector3(c, a);
+}
+
+static inline 
+void dVector4Copy(const dVector4& a,dVector4& c)
+{
+    dCopyVector4(c, a);
+}
+
+static inline 
+void dVector3Cross(const dVector3& a,const dVector3& b,dVector3& c)
+{
+    dCalcVectorCross3(c, a, b);
+}
+
+static inline 
+dReal dVector3Length(const dVector3& a)
+{
+    return dCalcVectorLength3(a);
+}
+
+static inline 
+dReal dVector3LengthSquare(const dVector3& a)
+{
+    return dCalcVectorLengthSquare3(a);
+}
+
+static inline 
+dReal dVector3Dot(const dVector3& a,const dVector3& b)
+{
+    return dCalcVectorDot3(a, b);
+}
+
+static inline 
+void dVector3Inv(dVector3& a)
+{
+    dNegateVector3(a);
+}
+
+static inline 
+void dMat3GetCol(const dMatrix3& m,const int col, dVector3& v)
+{
+    dGetMatrixColumn3(v, m, col);
+}
+
+static inline 
+void dVector3CrossMat3Col(const dMatrix3& m,const int col,const dVector3& v,dVector3& r)
+{
+    dCalcVectorCross3_114(r, v, m + col);
+}
+
+static inline 
+void dMat3ColCrossVector3(const dMatrix3& m,const int col,const dVector3& v,dVector3& r)
+{
+    dCalcVectorCross3_141(r, m + col, v);
+}
+
+static inline 
+void dMultiplyMat3Vec3(const dMatrix3& m,const dVector3& v, dVector3& r)
+{
+    dMultiply0_331(r, m, v);
+}
+
+static inline 
+dReal dPointPlaneDistance(const dVector3& point,const dVector4& plane)
+{
+    return (plane[0]*point[0] + plane[1]*point[1] + plane[2]*point[2] + plane[3]);
+}
+
+static inline 
+void dConstructPlane(const dVector3& normal,const dReal& distance, dVector4& plane)
+{
+    plane[0] = normal[0];
+    plane[1] = normal[1];
+    plane[2] = normal[2];
+    plane[3] = distance;
+}
+
+static inline 
+void dMatrix3Copy(const dReal* source,dMatrix3& dest)
+{
+    dCopyMatrix4x3(dest, source);
+}
+
+static inline 
+dReal dMatrix3Det( const dMatrix3& mat )
+{
+    dReal det;
+
+    det = mat[0] * ( mat[5]*mat[10] - mat[9]*mat[6] )
+        - mat[1] * ( mat[4]*mat[10] - mat[8]*mat[6] )
+        + mat[2] * ( mat[4]*mat[9]  - mat[8]*mat[5] );
+
+    return( det );
+}
+
+
+static inline 
+void dMatrix3Inv( const dMatrix3& ma, dMatrix3& dst )
+{
+    dReal det = dMatrix3Det( ma );
+
+    if ( dFabs( det ) < REAL(0.0005) )
+    {
+        dRSetIdentity( dst );
+        return;
+    }
+
+    double detRecip = REAL(1.0) / det;
+
+    dst[0] =  (dReal)(( ma[5]*ma[10] - ma[6]*ma[9]  ) * detRecip);
+    dst[1] =  (dReal)(( ma[9]*ma[2]  - ma[1]*ma[10] ) * detRecip);
+    dst[2] =  (dReal)(( ma[1]*ma[6]  - ma[5]*ma[2]  ) * detRecip);
+
+    dst[4] =  (dReal)(( ma[6]*ma[8]  - ma[4]*ma[10] ) * detRecip);
+    dst[5] =  (dReal)(( ma[0]*ma[10] - ma[8]*ma[2]  ) * detRecip);
+    dst[6] =  (dReal)(( ma[4]*ma[2]  - ma[0]*ma[6]  ) * detRecip);
+
+    dst[8] =  (dReal)(( ma[4]*ma[9]  - ma[8]*ma[5]  ) * detRecip);
+    dst[9] =  (dReal)(( ma[8]*ma[1]  - ma[0]*ma[9]  ) * detRecip);
+    dst[10] = (dReal)(( ma[0]*ma[5]  - ma[1]*ma[4]  ) * detRecip);
+}
+
+static inline 
+void dQuatTransform(const dQuaternion& quat,const dVector3& source,dVector3& dest)
+{
+
+    // Nguyen Binh : this code seem to be the fastest.
+    dReal x0 = 	source[0] * quat[0] + source[2] * quat[2] - source[1] * quat[3];
+    dReal x1 = 	source[1] * quat[0] + source[0] * quat[3] - source[2] * quat[1];
+    dReal x2 = 	source[2] * quat[0] + source[1] * quat[1] - source[0] * quat[2];
+    dReal x3 = 	source[0] * quat[1] + source[1] * quat[2] + source[2] * quat[3];
+
+    dest[0]  = 	quat[0] * x0 + quat[1] * x3 + quat[2] * x2 - quat[3] * x1;
+    dest[1]  = 	quat[0] * x1 + quat[2] * x3 + quat[3] * x0 - quat[1] * x2;
+    dest[2]  = 	quat[0] * x2 + quat[3] * x3 + quat[1] * x1 - quat[2] * x0;
+
+    /*
+    // nVidia SDK implementation
+    dVector3 uv, uuv; 
+    dVector3 qvec;
+    qvec[0] = quat[1];
+    qvec[1] = quat[2];
+    qvec[2] = quat[3];
+
+    dVector3Cross(qvec,source,uv);
+    dVector3Cross(qvec,uv,uuv);
+
+    dVector3Scale(uv,REAL(2.0)*quat[0]);
+    dVector3Scale(uuv,REAL(2.0));
+
+    dest[0] = source[0] + uv[0] + uuv[0];
+    dest[1] = source[1] + uv[1] + uuv[1];
+    dest[2] = source[2] + uv[2] + uuv[2];   
+    */
+}
+
+static inline 
+void dQuatInvTransform(const dQuaternion& quat,const dVector3& source,dVector3& dest)
+{
+
+    dReal norm = quat[0]*quat[0] + quat[1]*quat[1] + quat[2]*quat[2] + quat[3]*quat[3];
+
+    if (norm > REAL(0.0))
+    {
+        dQuaternion invQuat;
+        invQuat[0] =  quat[0] / norm;
+        invQuat[1] = -quat[1] / norm;
+        invQuat[2] = -quat[2] / norm;
+        invQuat[3] = -quat[3] / norm;	
+
+        dQuatTransform(invQuat,source,dest);
+
+    }
+    else
+    {
+        // Singular -> return identity
+        dVector3Copy(source,dest);
+    }
+}
+
+static inline 
+void dGetEulerAngleFromRot(const dMatrix3& mRot,dReal& rX,dReal& rY,dReal& rZ)
+{
+    rY = asin(mRot[0 * 4 + 2]);
+    if (rY < M_PI /2)
+    {
+        if (rY > -M_PI /2)
+        {
+            rX = atan2(-mRot[1*4 + 2], mRot[2*4 + 2]);
+            rZ = atan2(-mRot[0*4 + 1], mRot[0*4 + 0]);
+        }
+        else
+        {
+            // not unique
+            rX = -atan2(mRot[1*4 + 0], mRot[1*4 + 1]);
+            rZ = REAL(0.0);
+        }
+    }
+    else
+    {
+        // not unique
+        rX = atan2(mRot[1*4 + 0], mRot[1*4 + 1]);
+        rZ = REAL(0.0);
+    }
+}
+
+static inline
+void dQuatInv(const dQuaternion& source, dQuaternion& dest)
+{
+    dReal norm = source[0]*source[0] + source[1]*source[1] + source[2]*source[2] + source[3]*source[3];
+
+    if (norm > 0.0f)
+    {
+        dReal neg_norm_recip = -REAL(1.0) / norm;
+        dest[0] = -source[0] * neg_norm_recip;
+        dest[1] = source[1] * neg_norm_recip;
+        dest[2] = source[2] * neg_norm_recip;
+        dest[3] = source[3] * neg_norm_recip;	
+    }
+    else
+    {
+        // Singular -> return identity
+        dest[0] = REAL(1.0);
+        dest[1] = REAL(0.0);
+        dest[2] = REAL(0.0);
+        dest[3] = REAL(0.0);
+    }
+}
+
+// Finds barycentric
+static inline 
+void GetPointFromBarycentric(const dVector3 dv[3], dReal u, dReal v, dVector3 Out){
+    dReal w = REAL(1.0) - u - v;
+
+    Out[0] = (dv[0][0] * w) + (dv[1][0] * u) + (dv[2][0] * v);
+    Out[1] = (dv[0][1] * w) + (dv[1][1] * u) + (dv[2][1] * v);
+    Out[2] = (dv[0][2] * w) + (dv[1][2] * u) + (dv[2][2] * v);
+    Out[3] = (dv[0][3] * w) + (dv[1][3] * u) + (dv[2][3] * v);
+}
+
+
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/common.h b/libs/ode-0.16.1/ode/src/common.h
new file mode 100644
index 0000000..0d67c2e
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/common.h
@@ -0,0 +1,351 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_PRIVATE_COMMON_H_
+#define _ODE_PRIVATE_COMMON_H_
+
+
+#include "typedefs.h"
+#include "error.h"
+#include <ode/memory.h>
+#include <algorithm>
+
+
+#ifndef SIZE_MAX
+#define SIZE_MAX  ((sizeint)(-1))
+#endif
+
+#define dMACRO_MAX(a, b) ((a) > (b) ? (a) : (b))
+#define dMACRO_MIN(a, b) ((a) < (b) ? (a) : (b))
+
+
+#ifdef dSINGLE
+#define dEpsilon  FLT_EPSILON
+#else
+#define dEpsilon  DBL_EPSILON
+#endif
+
+
+#ifdef dSINGLE
+
+#if !defined(FLT_MANT_DIG)
+#define FLT_MANT_DIG 24
+#endif
+
+#define dMaxExact   ((float)((1UL << FLT_MANT_DIG) - 1))
+#define dMinExact   ((float)(-dMaxExact))
+
+
+#else // #ifndef dSINGLE
+
+#if !defined(DBL_MANT_DIG)
+#define DBL_MANT_DIG 53
+#endif
+
+#define dMaxExact   (double)((1ULL << DBL_MANT_DIG) - 1)
+#define dMinExact   ((double)(-dMaxExact))
+
+
+#endif // #ifndef dSINGLE
+
+
+#define dMaxIntExact dMACRO_MIN(dMaxExact, (dReal)INT_MAX)
+#define dMinIntExact dMACRO_MAX(dMinExact, (dReal)INT_MIN)
+
+
+#ifndef offsetof
+#define offsetof(s, m) ((sizeint)&(((s *)8)->m) - (sizeint)8)
+#endif
+#ifndef membersize
+#define membersize(s, m) (sizeof(((s *)8)->m))
+#endif
+#ifndef endoffsetof
+#define endoffsetof(s, m)   ((sizeint)((sizeint)&(((s *)8)->m) - (sizeint)8) + sizeof(((s *)8)->m))
+#endif
+
+
+/* the efficient alignment. most platforms align data structures to some
+ * number of bytes, but this is not always the most efficient alignment.
+ * for example, many x86 compilers align to 4 bytes, but on a pentium it
+ * is important to align doubles to 8 byte boundaries (for speed), and
+ * the 4 floats in a SIMD register to 16 byte boundaries. many other
+ * platforms have similar behavior. setting a larger alignment can waste
+ * a (very) small amount of memory. NOTE: this number must be a power of
+ * two. this is set to 16 by default.
+ */
+#ifndef EFFICIENT_ALIGNMENT
+#define EFFICIENT_ALIGNMENT 16
+#endif
+
+#define dALIGN_SIZE(buf_size, alignment) (((buf_size) + (alignment - 1)) & (int)(~(alignment - 1))) // Casting the mask to int ensures sign-extension to larger integer sizes
+#define dALIGN_PTR(buf_ptr, alignment) ((void *)(((uintptr)(buf_ptr) + ((alignment) - 1)) & (int)(~(alignment - 1)))) // Casting the mask to int ensures sign-extension to larger integer sizes
+
+/* round something up to be a multiple of the EFFICIENT_ALIGNMENT */
+#define dEFFICIENT_SIZE(x) dALIGN_SIZE(x, EFFICIENT_ALIGNMENT)
+#define dEFFICIENT_PTR(p) dALIGN_PTR(p, EFFICIENT_ALIGNMENT)
+#define dOFFSET_EFFICIENTLY(p, b) ((void *)((uintptr)(p) + dEFFICIENT_SIZE(b)))
+
+#define dOVERALIGNED_SIZE(size, alignment) dEFFICIENT_SIZE((size) + ((alignment) - EFFICIENT_ALIGNMENT))
+#define dOVERALIGNED_PTR(buf_ptr, alignment) dALIGN_PTR(buf_ptr, alignment)
+#define dOFFSET_OVERALIGNEDLY(buf_ptr, size, alignment) ((void *)((uintptr)(buf_ptr) + dOVERALIGNED_SIZE(size, alignment)))
+
+
+
+#define dDERIVE_SIZE_UNION_PADDING_ELEMENTS(DataSize, ElementType) (((DataSize) + sizeof(ElementType) - 1) / sizeof(ElementType))
+#define dDERIVE_TYPE_UNION_PADDING_ELEMENTS(DataType, ElementType) dDERIVE_SIZE_UNION_PADDING_ELEMENTS(sizeof(DataType), ElementType)
+#define dDERIVE_SIZE_EXTRA_PADDING_ELEMENTS(DataSize, AlignmentSize, ElementType) (((dALIGN_SIZE(DataSize, dMACRO_MAX(AlignmentSize, sizeof(ElementType))) - (DataSize)) / sizeof(ElementType))
+
+
+
+/* alloca aligned to the EFFICIENT_ALIGNMENT. note that this can waste
+ * up to 15 bytes per allocation, depending on what alloca() returns.
+ */
+#define dALLOCA16(n) \
+    dEFFICIENT_PTR(alloca((n)+(EFFICIENT_ALIGNMENT)))
+
+
+class dxAlignedAllocation
+{
+public:
+    dxAlignedAllocation(): m_userAreaPointer(NULL), m_bufferAllocated(NULL), m_sizeUsed(0) {}
+    ~dxAlignedAllocation() { freeAllocation(); }
+
+    void *allocAligned(sizeint sizeRequired, unsigned alignmentRequired)
+    {
+        dIASSERT((alignmentRequired & (alignmentRequired - 1)) == 0);
+        dIASSERT(alignmentRequired <= SIZE_MAX - sizeRequired);
+
+        sizeint sizeToUse = sizeRequired + alignmentRequired;
+        void *bufferPointer = dAlloc(sizeToUse);
+        void *userAreaPointer = bufferPointer != NULL && alignmentRequired != 0 ? dALIGN_PTR(bufferPointer, alignmentRequired) : bufferPointer;
+        assignData(userAreaPointer, bufferPointer, sizeToUse);
+
+        return userAreaPointer;
+    }
+
+    void *getUserAreaPointer() const { return m_userAreaPointer; }
+    sizeint getUserAreaSize() const { return m_sizeUsed - ((uint8 *)m_userAreaPointer - (uint8 *)m_bufferAllocated); }
+
+    void freeAllocation()
+    {
+        sizeint sizeUsed;
+        void *bufferPointer = extractData(sizeUsed);
+        
+        if (bufferPointer != NULL)
+        {
+            dFree(bufferPointer, sizeUsed);
+        }
+    }
+
+private:
+    void assignData(void *userAreaPointer, void *bufferAllocated, sizeint sizeUsed)
+    {
+        dIASSERT(m_userAreaPointer == NULL);
+        dIASSERT(m_bufferAllocated == NULL);
+        dIASSERT(m_sizeUsed == 0);
+
+        m_userAreaPointer = userAreaPointer;
+        m_bufferAllocated = bufferAllocated;
+        m_sizeUsed = sizeUsed;
+    }
+
+    void *extractData(sizeint &out_sizeUsed)
+    {
+        void *bufferPointer = m_bufferAllocated;
+
+        if (bufferPointer != NULL)
+        {
+            out_sizeUsed = m_sizeUsed;
+
+            m_userAreaPointer = NULL;
+            m_bufferAllocated = NULL;
+            m_sizeUsed = 0;
+        }
+
+        return bufferPointer;
+    }
+
+private:
+    void *m_userAreaPointer;
+    void *m_bufferAllocated;
+    sizeint m_sizeUsed;
+};
+
+
+template<typename DstType, typename SrcType>
+inline 
+bool _cast_to_smaller(DstType &dtOutResult, const SrcType &stArgument)
+{
+    return (SrcType)(dtOutResult = (DstType)stArgument) == stArgument;
+}
+
+#if defined(__GNUC__)
+
+#define dCAST_TO_SMALLER(TargetType, SourceValue) ({ TargetType ttCastSmallerValue; dIVERIFY(_cast_to_smaller(ttCastSmallerValue, SourceValue)); ttCastSmallerValue; })
+
+
+#else // #if !defined(__GNUC__)
+
+#define dCAST_TO_SMALLER(TargetType, SourceValue) templateCAST_TO_SMALLER<TargetType>(SourceValue)
+
+template <typename TTargetType, typename TSourceType>
+inline TTargetType templateCAST_TO_SMALLER(const TSourceType &stSourceValue)
+{
+    TTargetType ttCastSmallerValue;
+    dIVERIFY(_cast_to_smaller(ttCastSmallerValue, stSourceValue));
+    return ttCastSmallerValue;
+}
+
+
+#endif // #if !defined(__GNUC__)
+
+
+template<typename value_type>
+inline 
+void dxSwap(value_type &one, value_type &another)
+{
+    std::swap(one, another);
+}
+
+template<typename value_type, typename lo_type, typename hi_type>
+inline 
+value_type dxClamp(const value_type &value, const lo_type &lo, const hi_type &hi)
+{
+    return value < lo ? (value_type)lo : value > hi ? (value_type)hi : value;
+}
+
+
+template <typename Type>
+union _const_type_cast_union
+{
+    explicit _const_type_cast_union(const void *psvCharBuffer): m_psvCharBuffer(psvCharBuffer) {}
+
+    operator const Type *() const { return m_pstTypedPointer; }
+    const Type &operator *() const { return *m_pstTypedPointer; }
+    const Type *operator ->() const { return m_pstTypedPointer; }
+    const Type &operator [](diffint diElementIndex) const { return m_pstTypedPointer[diElementIndex]; }
+    const Type &operator [](sizeint siElementIndex) const { return m_pstTypedPointer[siElementIndex]; }
+
+    const void 		*m_psvCharBuffer;
+    const Type		*m_pstTypedPointer;
+};
+
+template <typename Type>
+union _type_cast_union
+{
+    explicit _type_cast_union(void *psvCharBuffer): m_psvCharBuffer(psvCharBuffer) {}
+
+    operator Type *() const { return m_pstTypedPointer; }
+    Type &operator *() const { return *m_pstTypedPointer; }
+    Type *operator ->() const { return m_pstTypedPointer; }
+    Type &operator [](diffint diElementIndex) const { return m_pstTypedPointer[diElementIndex]; }
+    Type &operator [](sizeint siElementIndex) const { return m_pstTypedPointer[siElementIndex]; }
+
+    void			*m_psvCharBuffer;
+    Type			*m_pstTypedPointer;
+};
+
+
+template<sizeint tsiTypeSize>
+struct _sized_signed;
+
+template<>
+struct _sized_signed<sizeof(uint8)>
+{
+    typedef int8 type;
+};
+
+template<>
+struct _sized_signed<sizeof(uint16)>
+{
+    typedef int16 type;
+};
+
+template<>
+struct _sized_signed<sizeof(uint32)>
+{
+    typedef int32 type;
+};
+
+template<>
+struct _sized_signed<sizeof(uint64)>
+{
+    typedef int64 type;
+};
+
+template<typename tintergraltype>
+struct _make_signed
+{
+    typedef typename _sized_signed<sizeof(tintergraltype)>::type type;
+};
+
+
+template<sizeint tsiTypeSize>
+struct _sized_unsigned;
+
+template<>
+struct _sized_unsigned<sizeof(int8)>
+{
+    typedef uint8 type;
+};
+
+template<>
+struct _sized_unsigned<sizeof(int16)>
+{
+    typedef uint16 type;
+};
+
+template<>
+struct _sized_unsigned<sizeof(int32)>
+{
+    typedef uint32 type;
+};
+
+template<>
+struct _sized_unsigned<sizeof(int64)>
+{
+    typedef uint64 type;
+};
+
+template<typename tintergraltype>
+struct _make_unsigned
+{
+    typedef typename _sized_unsigned<sizeof(tintergraltype)>::type type;
+};
+
+
+// template<typename tvalueint, typename tminint, typename tmaxint>
+// inline 
+// bool dxInRange(tvalueint viValue, tminint miMin, tmaxint miMax)
+// {
+//     return (typename _sized_unsigned<dMACRO_MAX(sizeof(tvalueint), sizeof(tminint))>::type)(viValue - miMin) < (typename _sized_unsigned<dMACRO_MAX(sizeof(tmaxint), sizeof(tminint))>::type)(miMax - miMin);
+// }
+// #define dIN_RANGE(aval, amin, amax) dxInRange(aval, amin, amax)
+
+#define dIN_RANGE(aval, amin, amax) ((_sized_unsigned<dMACRO_MAX(sizeof(aval), sizeof(amin))>::type)((_sized_unsigned<dMACRO_MAX(sizeof(aval), sizeof(amin))>::type)(aval) - (_sized_unsigned<dMACRO_MAX(sizeof(aval), sizeof(amin))>::type)(amin)) < (_sized_unsigned<dMACRO_MAX(sizeof(amax), sizeof(amin))>::type)((_sized_unsigned<dMACRO_MAX(sizeof(amax), sizeof(amin))>::type)(amax) - (_sized_unsigned<dMACRO_MAX(sizeof(amax), sizeof(amin))>::type)(amin)))
+#define dTMPL_IN_RANGE(aval, amin, amax) ((typename _sized_unsigned<dMACRO_MAX(sizeof(aval), sizeof(amin))>::type)((typename _sized_unsigned<dMACRO_MAX(sizeof(aval), sizeof(amin))>::type)(aval) - (typename _sized_unsigned<dMACRO_MAX(sizeof(aval), sizeof(amin))>::type)(amin)) < (typename _sized_unsigned<dMACRO_MAX(sizeof(amax), sizeof(amin))>::type)((typename _sized_unsigned<dMACRO_MAX(sizeof(amax), sizeof(amin))>::type)(amax) - (typename _sized_unsigned<dMACRO_MAX(sizeof(amax), sizeof(amin))>::type)(amin)))
+#define dCLAMP(aval, alo, ahi) dxClamp(aval, alo, ahi)
+#define dARRAY_SIZE(aarr) (sizeof(aarr) / sizeof((aarr)[0]))
+#define dSTATIC_ARRAY_SIZE(aclass, aarr) dARRAY_SIZE(((aclass *)sizeof(void *))->aarr)
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/config.h.in b/libs/ode-0.16.1/ode/src/config.h.in
new file mode 100644
index 0000000..e6c0256
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/config.h.in
@@ -0,0 +1,329 @@
+/* ode/src/config.h.in.  Generated from configure.ac by autoheader.  */
+
+
+#ifndef ODE_CONFIG_H
+#define ODE_CONFIG_H
+
+
+/* Define if building universal (internal helper macro) */
+#undef AC_APPLE_UNIVERSAL_BUILD
+
+/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
+   systems. This function is required for `alloca.c' support on those systems.
+   */
+#undef CRAY_STACKSEG_END
+
+/* Define to 1 if using `alloca.c'. */
+#undef C_ALLOCA
+
+/* Define to 1 if you have `alloca', as a function or macro. */
+#undef HAVE_ALLOCA
+
+/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
+   */
+#undef HAVE_ALLOCA_H
+
+/* Use the Apple OpenGL framework. */
+#undef HAVE_APPLE_OPENGL_FRAMEWORK
+
+/* Define to 1 if you have the `atan2f' function. */
+#undef HAVE_ATAN2F
+
+/* Define to 1 if you have the `clock_gettime' function. */
+#undef HAVE_CLOCK_GETTIME
+
+/* Define to 1 if you have the `copysign' function. */
+#undef HAVE_COPYSIGN
+
+/* Define to 1 if you have the `copysignf' function. */
+#undef HAVE_COPYSIGNF
+
+/* Define to 1 if you have the `cosf' function. */
+#undef HAVE_COSF
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the `fabsf' function. */
+#undef HAVE_FABSF
+
+/* Define to 1 if you have the <float.h> header file. */
+#undef HAVE_FLOAT_H
+
+/* Define to 1 if you have the `floor' function. */
+#undef HAVE_FLOOR
+
+/* Define to 1 if you have the `fmodf' function. */
+#undef HAVE_FMODF
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#undef HAVE_GETTIMEOFDAY
+
+/* Define to 1 if you have the <GL/glext.h> header file. */
+#undef HAVE_GL_GLEXT_H
+
+/* Define to 1 if you have the <GL/glu.h> header file. */
+#undef HAVE_GL_GLU_H
+
+/* Define to 1 if you have the <GL/gl.h> header file. */
+#undef HAVE_GL_GL_H
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the `isnan' function. */
+#undef HAVE_ISNAN
+
+/* Define to 1 if you have the `isnanf' function. */
+#undef HAVE_ISNANF
+
+/* Define to 1 if you have the `m' library (-lm). */
+#undef HAVE_LIBM
+
+/* Define to 1 if you have the `rt' library (-lrt). */
+#undef HAVE_LIBRT
+
+/* Define to 1 if you have the `sunmath' library (-lsunmath). */
+#undef HAVE_LIBSUNMATH
+
+/* Define to 1 if you have the <limits.h> header file. */
+#undef HAVE_LIMITS_H
+
+/* Define to 1 if you have the <malloc.h> header file. */
+#undef HAVE_MALLOC_H
+
+/* Define to 1 if you have the <math.h> header file. */
+#undef HAVE_MATH_H
+
+/* Define to 1 if you have the `memmove' function. */
+#undef HAVE_MEMMOVE
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the `memset' function. */
+#undef HAVE_MEMSET
+
+/* Define to 1 if you have the `no_pthread_condattr_setclock' function. */
+#undef HAVE_NO_PTHREAD_CONDATTR_SETCLOCK
+
+/* Define to 1 if libc includes obstacks. */
+#undef HAVE_OBSTACK
+
+/* Define to 1 if you have the `pthread_attr_setinheritsched' function. */
+#undef HAVE_PTHREAD_ATTR_SETINHERITSCHED
+
+/* Define to 1 if you have the `pthread_attr_setstacklazy' function. */
+#undef HAVE_PTHREAD_ATTR_SETSTACKLAZY
+
+/* Define to 1 if you have the `pthread_condattr_setclock' function. */
+#undef HAVE_PTHREAD_CONDATTR_SETCLOCK
+
+/* Define to 1 if you have the `sinf' function. */
+#undef HAVE_SINF
+
+/* Define to 1 if you have the `snprintf' function. */
+#undef HAVE_SNPRINTF
+
+/* Define to 1 if you have the `sqrt' function. */
+#undef HAVE_SQRT
+
+/* Define to 1 if you have the `sqrtf' function. */
+#undef HAVE_SQRTF
+
+/* Define to 1 if you have the <stdarg.h> header file. */
+#undef HAVE_STDARG_H
+
+/* Define to 1 if stdbool.h conforms to C99. */
+#undef HAVE_STDBOOL_H
+
+/* Define to 1 if you have the <stddef.h> header file. */
+#undef HAVE_STDDEF_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdio.h> header file. */
+#undef HAVE_STDIO_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the `strchr' function. */
+#undef HAVE_STRCHR
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the `strstr' function. */
+#undef HAVE_STRSTR
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <time.h> header file. */
+#undef HAVE_TIME_H
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the `vsnprintf' function. */
+#undef HAVE_VSNPRINTF
+
+/* Define to 1 if the system has the type `_Bool'. */
+#undef HAVE__BOOL
+
+/* Define to 1 if you have the `_isnan' function. */
+#undef HAVE__ISNAN
+
+/* Define to 1 if you have the `_isnanf' function. */
+#undef HAVE__ISNANF
+
+/* Define to 1 if you have the `__isnan' function. */
+#undef HAVE___ISNAN
+
+/* Define to 1 if you have the `__isnanf' function. */
+#undef HAVE___ISNANF
+
+/* Define to the sub-directory where libtool stores uninstalled libraries. */
+#undef LT_OBJDIR
+
+/* Mac OS X version setting for OU Library */
+#undef MAC_OS_X_VERSION
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* compiling for a pentium on a gcc-based platform? */
+#undef PENTIUM
+
+/* If using the C implementation of alloca, define if you know the
+   direction of stack growth for your system; otherwise it will be
+   automatically deduced at runtime.
+	STACK_DIRECTION > 0 => grows toward higher addresses
+	STACK_DIRECTION < 0 => grows toward lower addresses
+	STACK_DIRECTION = 0 => direction of growth unknown */
+#undef STACK_DIRECTION
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Version number of package */
+#undef VERSION
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+#  undef WORDS_BIGENDIAN
+# endif
+#endif
+
+/* compiling for a X86_64 system on a gcc-based platform? */
+#undef X86_64_SYSTEM
+
+/* OU features enabled */
+#undef _OU_FEATURE_SET
+
+/* libou namespace for ODE */
+#undef _OU_NAMESPACE
+
+/* Target OS setting for OU Library */
+#undef _OU_TARGET_OS
+
+/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
+   <pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
+   #define below would cause a syntax error. */
+#undef _UINT32_T
+
+/* Atomic API of OU is enabled */
+#undef dATOMICS_ENABLED
+
+/* Built-in multithreaded threading implementation is included */
+#undef dBUILTIN_THREADING_IMPL_ENABLED
+
+/* Generic OU features are enabled */
+#undef dOU_ENABLED
+
+/* Threading interface is disabled */
+#undef dTHREADING_INTF_DISABLED
+
+/* Thread Local Storage API of OU is enabled */
+#undef dTLS_ENABLED
+
+/* Use the old trimesh-trimesh collider */
+#undef dTRIMESH_OPCODE_USE_OLD_TRIMESH_TRIMESH_COLLIDER
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+#undef inline
+#endif
+
+/* Define to the type of a signed integer type of width exactly 32 bits if
+   such a type exists and the standard includes do not define it. */
+#undef int32_t
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+#undef size_t
+
+/* Define to the type of an unsigned integer type of width exactly 32 bits if
+   such a type exists and the standard includes do not define it. */
+#undef uint32_t
+
+/* Define to empty if the keyword `volatile' does not work. Warning: valid
+   code using `volatile' can become incorrect without. Disable with care. */
+#undef volatile
+
+
+
+#ifdef HAVE_ALLOCA_H
+#include <alloca.h>
+#endif
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+
+#include "typedefs.h"
+
+
+#endif /* #define ODE_CONFIG_H */
+
diff --git a/libs/ode-0.16.1/ode/src/convex.cpp b/libs/ode-0.16.1/ode/src/convex.cpp
new file mode 100644
index 0000000..7a17941
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/convex.cpp
@@ -0,0 +1,1621 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+/*
+Code for Convex Collision Detection
+By Rodrigo Hernandez
+*/
+#include <ode/common.h>
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"
+#include "collision_std.h"
+#include "collision_util.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+#if 1
+#define dMIN(A,B)  ((A)>(B) ? (B) : (A))
+#define dMAX(A,B)  ((A)>(B) ? (A) : (B))
+#else
+#define dMIN(A,B)  std::min(A,B)
+#define dMAX(A,B)  std::max(A,B)
+#endif
+
+//****************************************************************************
+// Convex public API
+dxConvex::dxConvex (dSpaceID space,
+                    const dReal *_planes,
+                    unsigned int _planecount,
+                    const dReal *_points,
+                    unsigned int _pointcount,
+                    const unsigned int *_polygons) :
+dxGeom (space,1)
+{
+    dAASSERT (_planes != NULL);
+    dAASSERT (_points != NULL);
+    dAASSERT (_polygons != NULL);
+    //fprintf(stdout,"dxConvex Constructor planes %X\n",_planes);
+    type = dConvexClass;
+    planes = _planes;
+    planecount = _planecount;
+    // we need points as well
+    points = _points;
+    pointcount = _pointcount;
+    polygons=_polygons;
+    edges = NULL;
+    FillEdges();
+#ifndef dNODEBUG
+    // Check for properly build polygons by calculating the determinant
+    // of the 3x3 matrix composed of the first 3 points in the polygon.
+    const unsigned int *points_in_poly=polygons;
+    const unsigned int *index=polygons+1;
+
+    for(unsigned int i=0;i<planecount;++i)
+    {
+        dAASSERT (*points_in_poly > 2 );
+        if((
+            points[(index[0]*3)+0]*points[(index[1]*3)+1]*points[(index[2]*3)+2] +
+            points[(index[0]*3)+1]*points[(index[1]*3)+2]*points[(index[2]*3)+0] +
+            points[(index[0]*3)+2]*points[(index[1]*3)+0]*points[(index[2]*3)+1] -
+            points[(index[0]*3)+2]*points[(index[1]*3)+1]*points[(index[2]*3)+0] -
+            points[(index[0]*3)+1]*points[(index[1]*3)+0]*points[(index[2]*3)+2] -
+            points[(index[0]*3)+0]*points[(index[1]*3)+2]*points[(index[2]*3)+1])<0)
+        {
+            fprintf(stdout,"WARNING: Polygon %d is not defined counterclockwise\n",i);
+        }
+        points_in_poly+=(*points_in_poly+1);
+        index=points_in_poly+1;
+        if(planes[(i*4)+3]<0) fprintf(stdout,"WARNING: Plane %d does not contain the origin\n",i);
+    }
+#endif
+
+    //CreateTree();
+}
+
+
+void dxConvex::computeAABB()
+{
+    // this can, and should be optimized
+    dVector3 point;
+    dMultiply0_331 (point,final_posr->R,points);
+    aabb[0] = point[0]+final_posr->pos[0];
+    aabb[1] = point[0]+final_posr->pos[0];
+    aabb[2] = point[1]+final_posr->pos[1];
+    aabb[3] = point[1]+final_posr->pos[1];
+    aabb[4] = point[2]+final_posr->pos[2];
+    aabb[5] = point[2]+final_posr->pos[2];
+    for(unsigned int i=3;i<(pointcount*3);i+=3)
+    {
+        dMultiply0_331 (point,final_posr->R,&points[i]);
+        aabb[0] = dMIN(aabb[0],point[0]+final_posr->pos[0]);
+        aabb[1] = dMAX(aabb[1],point[0]+final_posr->pos[0]);
+        aabb[2] = dMIN(aabb[2],point[1]+final_posr->pos[1]);
+        aabb[3] = dMAX(aabb[3],point[1]+final_posr->pos[1]);
+        aabb[4] = dMIN(aabb[4],point[2]+final_posr->pos[2]);
+        aabb[5] = dMAX(aabb[5],point[2]+final_posr->pos[2]);
+    }
+}
+
+/*! \brief Populates the edges set, should be called only once whenever the polygon array gets updated */
+void dxConvex::FillEdges()
+{
+    const unsigned int *points_in_poly=polygons;
+    const unsigned int *index=polygons+1;
+    if (edges!=NULL) delete[] edges;
+    edgecount = 0;
+    edge e;
+    bool isinset;
+    for(unsigned int i=0;i<planecount;++i)
+    {
+        for(unsigned int j=0;j<*points_in_poly;++j)
+        {
+            e.first = dMIN(index[j],index[(j+1)%*points_in_poly]);
+            e.second = dMAX(index[j],index[(j+1)%*points_in_poly]);
+            isinset=false;
+            for(unsigned int k=0;k<edgecount;++k)
+            {
+                if((edges[k].first==e.first)&&(edges[k].second==e.second))
+                {
+                    isinset=true;
+                    break;
+                }
+            }
+            if(!isinset)
+            {
+                edge* tmp = new edge[edgecount+1];
+                if(edgecount!=0)
+                {
+                    memcpy(tmp,edges,(edgecount)*sizeof(edge));
+                    delete[] edges;
+                }
+                tmp[edgecount].first=e.first;
+                tmp[edgecount].second=e.second;
+                edges = tmp;
+                ++edgecount;
+            }
+        }
+        points_in_poly+=(*points_in_poly+1);
+        index=points_in_poly+1;
+    }
+}
+#if 0
+dxConvex::BSPNode* dxConvex::CreateNode(std::vector<Arc> Arcs,std::vector<Polygon> Polygons)
+{
+#if 0
+    dVector3 ea,eb,e;
+    dVector3Copy(points+((edges.begin()+Arcs[0].edge)first*3),ea);
+    dMultiply0_331(e1b,cvx1.final_posr->R,cvx1.points+(i->second*3));
+
+    dVector3Copy(points[edges[Arcs[0].edge]
+#endif
+    return NULL;
+}
+
+void dxConvex::CreateTree()
+{
+    std::vector<Arc> A;
+    A.reserve(edgecount);
+    for(unsigned int i=0;i<edgecount;++i)
+    {
+        this->GetFacesSharedByEdge(i,A[i].normals);
+        A[i].edge = i;
+    }
+    std::vector<Polygon> S;
+    S.reserve(pointcount);
+    for(unsigned int i=0;i<pointcount;++i)
+    {
+        this->GetFacesSharedByVertex(i,S[i].normals);
+        S[i].vertex=i;
+    }
+    this->tree = CreateNode(A,S);
+}
+
+void dxConvex::GetFacesSharedByVertex(int i, std::vector<int> f)
+{
+}
+void dxConvex::GetFacesSharedByEdge(int i, int* f)
+{
+}
+void dxConvex::GetFaceNormal(int i, dVector3 normal)
+{
+}
+#endif
+
+dGeomID dCreateConvex (dSpaceID space,const dReal *_planes,unsigned int _planecount,
+                       const dReal *_points,
+                       unsigned int _pointcount,
+                       const unsigned int *_polygons)
+{
+    //fprintf(stdout,"dxConvex dCreateConvex\n");
+    return new dxConvex(space,_planes, _planecount,
+        _points,
+        _pointcount,
+        _polygons);
+}
+
+void dGeomSetConvex (dGeomID g,const dReal *_planes,unsigned int _planecount,
+                     const dReal *_points,
+                     unsigned int _pointcount,
+                     const unsigned int *_polygons)
+{
+    //fprintf(stdout,"dxConvex dGeomSetConvex\n");
+    dUASSERT (g && g->type == dConvexClass,"argument not a convex shape");
+    dxConvex *s = (dxConvex*) g;
+    s->planes = _planes;
+    s->planecount = _planecount;
+    s->points = _points;
+    s->pointcount = _pointcount;
+    s->polygons=_polygons;
+}
+
+//****************************************************************************
+// Helper Inlines
+//
+
+/*! \brief Returns Whether or not the segment ab intersects plane p
+  \param a origin of the segment
+  \param b segment destination
+  \param p plane to test for intersection
+  \param t returns the time "t" in the segment ray that gives us the intersecting
+  point
+  \param q returns the intersection point
+  \return true if there is an intersection, otherwise false.
+*/
+bool IntersectSegmentPlane(dVector3 a,
+                           dVector3 b,
+                           dVector4 p,
+                           dReal &t,
+                           dVector3 q)
+{
+    // Compute the t value for the directed line ab intersecting the plane
+    dVector3 ab;
+    ab[0]= b[0] - a[0];
+    ab[1]= b[1] - a[1];
+    ab[2]= b[2] - a[2];
+
+    t = (p[3] - dCalcVectorDot3(p,a)) / dCalcVectorDot3(p,ab);
+
+    // If t in [0..1] compute and return intersection point
+    if (t >= 0.0 && t <= 1.0)
+    {
+        q[0] = a[0] + t * ab[0];
+        q[1] = a[1] + t * ab[1];
+        q[2] = a[2] + t * ab[2];
+        return true;
+    }
+    // Else no intersection
+    return false;
+}
+
+/*! \brief Returns the Closest Point in Ray 1 to Ray 2
+  \param Origin1 The origin of Ray 1
+  \param Direction1 The direction of Ray 1
+  \param Origin1 The origin of Ray 2
+  \param Direction1 The direction of Ray 3
+  \param t the time "t" in Ray 1 that gives us the closest point
+  (closest_point=Origin1+(Direction1*t).
+  \return true if there is a closest point, false if the rays are paralell.
+*/
+inline bool ClosestPointInRay(const dVector3 Origin1,
+                              const dVector3 Direction1,
+                              const dVector3 Origin2,
+                              const dVector3 Direction2,
+                              dReal& t)
+{
+    dVector3 w = {Origin1[0]-Origin2[0],
+        Origin1[1]-Origin2[1],
+        Origin1[2]-Origin2[2]};
+    dReal a = dCalcVectorDot3(Direction1 , Direction1);
+    dReal b = dCalcVectorDot3(Direction1 , Direction2);
+    dReal c = dCalcVectorDot3(Direction2 , Direction2);
+    dReal d = dCalcVectorDot3(Direction1 , w);
+    dReal e = dCalcVectorDot3(Direction2 , w);
+    dReal denominator = (a*c)-(b*b);
+    if(denominator==0.0f)
+    {
+        return false;
+    }
+    t = ((a*e)-(b*d))/denominator;
+    return true;
+}
+
+/*! \brief Returns the Closest Points from Segment 1 to Segment 2
+  \param p1 start of segment 1
+  \param q1 end of segment 1
+  \param p2 start of segment 2
+  \param q2 end of segment 2
+  \param t the time "t" in Ray 1 that gives us the closest point
+  (closest_point=Origin1+(Direction1*t).
+  \return true if there is a closest point, false if the rays are paralell.
+  \note Adapted from Christer Ericson's Real Time Collision Detection Book.
+*/
+inline void ClosestPointBetweenSegments(dVector3& p1,
+                                         dVector3& q1,
+                                         dVector3& p2,
+                                         dVector3& q2,
+                                         dVector3& c1,
+                                         dVector3& c2)
+{
+    // s & t were originaly part of the output args, but since
+    // we don't really need them, we'll just declare them in here
+    dReal s;
+    dReal t;
+    dVector3 d1 = {q1[0] - p1[0],
+        q1[1] - p1[1],
+        q1[2] - p1[2]};
+    dVector3 d2 = {q2[0] - p2[0],
+        q2[1] - p2[1],
+        q2[2] - p2[2]};
+    dVector3 r  = {p1[0] - p2[0],
+        p1[1] - p2[1],
+        p1[2] - p2[2]};
+    dReal a = dCalcVectorDot3(d1, d1);
+    dReal e = dCalcVectorDot3(d2, d2);
+    dReal f = dCalcVectorDot3(d2, r);
+    // Check if either or both segments degenerate into points
+    if (a <= dEpsilon && e <= dEpsilon)
+    {
+        // Both segments degenerate into points
+        s = t = 0.0f;
+        dVector3Copy(p1,c1);
+        dVector3Copy(p2,c2);
+        return;
+    }
+    if (a <= dEpsilon)
+    {
+        // First segment degenerates into a point
+        s = 0.0f;
+        t = f / e; // s = 0 => t = (b*s + f) / e = f / e
+        t = dxClamp(t, 0.0f, 1.0f);
+    }
+    else
+    {
+        dReal c = dCalcVectorDot3(d1, r);
+        if (e <= dEpsilon)
+        {
+            // Second segment degenerates into a point
+            t = 0.0f;
+            s = dxClamp(-c / a, 0.0f, 1.0f); // t = 0 => s = (b*t - c) / a = -c / a
+        }
+        else
+        {
+            // The general non degenerate case starts here
+            dReal b = dCalcVectorDot3(d1, d2);
+            dReal denom = a*e-b*b; // Always nonnegative
+
+            // If segments not parallel, compute closest point on L1 to L2, and
+            // clamp to segment S1. Else pick arbitrary s (here 0)
+            if (denom != 0.0f)
+            {
+                s = dxClamp((b*f - c*e) / denom, 0.0f, 1.0f);
+            }
+            else s = 0.0f;
+#if 0
+            // Compute point on L2 closest to S1(s) using
+            // t = Dot((P1+D1*s)-P2,D2) / Dot(D2,D2) = (b*s + f) / e
+            t = (b*s + f) / e;
+
+            // If t in [0,1] done. Else clamp t, recompute s for the new value
+            // of t using s = Dot((P2+D2*t)-P1,D1) / Dot(D1,D1)= (t*b - c) / a
+            // and clamp s to [0, 1]
+            if (t < 0.0f) {
+                t = 0.0f;
+                s = dxClamp(-c / a, 0.0f, 1.0f);
+            } else if (t > 1.0f) {
+                t = 1.0f;
+                s = dxClamp((b - c) / a, 0.0f, 1.0f);
+            }
+#else
+            dReal tnom = b*s + f;
+            if (tnom < 0.0f)
+            {
+                t = 0.0f;
+                s = dxClamp(-c / a, 0.0f, 1.0f);
+            }
+            else if (tnom > e)
+            {
+                t = 1.0f;
+                s = dxClamp((b - c) / a, 0.0f, 1.0f);
+            }
+            else
+            {
+                t = tnom / e;
+            }
+#endif
+        }
+    }
+
+    c1[0] = p1[0] + d1[0] * s;
+    c1[1] = p1[1] + d1[1] * s;
+    c1[2] = p1[2] + d1[2] * s;
+    c2[0] = p2[0] + d2[0] * t;
+    c2[1] = p2[1] + d2[1] * t;
+    c2[2] = p2[2] + d2[2] * t;
+}
+
+#if 0
+dReal tnom = b*s + f;
+if (tnom < 0.0f) {
+    t = 0.0f;
+    s = dxClamp(-c / a, 0.0f, 1.0f);
+} else if (tnom > e) {
+    t = 1.0f;
+    s = dxClamp((b - c) / a, 0.0f, 1.0f);
+} else {
+    t = tnom / e;
+}
+#endif
+
+/*! \brief Returns the Ray on which 2 planes intersect if they do.
+  \param p1 Plane 1
+  \param p2 Plane 2
+  \param p Contains the origin of the ray upon returning if planes intersect
+  \param d Contains the direction of the ray upon returning if planes intersect
+  \return true if the planes intersect, false if paralell.
+*/
+inline bool IntersectPlanes(const dVector4 p1, const dVector4 p2, dVector3 p, dVector3 d)
+{
+    // Compute direction of intersection line
+    dCalcVectorCross3(d,p1,p2);
+    // If d is (near) zero, the planes are parallel (and separated)
+    // or coincident, so they're not considered intersecting
+    dReal denom = dCalcVectorDot3(d, d);
+    if (denom < dEpsilon) return false;
+    dVector3 n;
+    n[0]=p1[3]*p2[0] - p2[3]*p1[0];
+    n[1]=p1[3]*p2[1] - p2[3]*p1[1];
+    n[2]=p1[3]*p2[2] - p2[3]*p1[2];
+    // Compute point on intersection line
+    dCalcVectorCross3(p,n,d);
+    p[0]/=denom;
+    p[1]/=denom;
+    p[2]/=denom;
+    return true;
+}
+
+
+#if 0
+/*! \brief Finds out if a point lies inside a convex
+  \param p Point to test
+  \param convex a pointer to convex to test against
+  \return true if the point lies inside the convex, false if not.
+*/
+inline bool IsPointInConvex(dVector3 p,
+                            dxConvex *convex)
+{
+    dVector3 lp,tmp;
+    // move point into convex space to avoid plane local to world calculations
+    tmp[0] = p[0] - convex->final_posr->pos[0];
+    tmp[1] = p[1] - convex->final_posr->pos[1];
+    tmp[2] = p[2] - convex->final_posr->pos[2];
+    dMultiply1_331 (lp,convex->final_posr->R,tmp);
+    for(unsigned int i=0;i<convex->planecount;++i)
+    {
+        if((
+            ((convex->planes+(i*4))[0]*lp[0])+
+            ((convex->planes+(i*4))[1]*lp[1])+
+            ((convex->planes+(i*4))[2]*lp[2])+
+            -(convex->planes+(i*4))[3]
+        )>0)
+        {
+            return false;
+        }
+    }
+    return true;
+}
+#endif
+
+/*! \brief Finds out if a point lies inside a 2D polygon
+  \param p Point to test
+  \param polygon a pointer to the start of the convex polygon index buffer
+  \param out the closest point in the polygon if the point is not inside
+  \return true if the point lies inside of the polygon, false if not.
+*/
+inline bool IsPointInPolygon(dVector3 p,
+                             const unsigned int *polygon,
+                             dReal *plane,
+                             dxConvex *convex,
+                             dVector3 out)
+{
+    // p is the point we want to check,
+    // polygon is a pointer to the polygon we
+    // are checking against, remember it goes
+    // number of vertices then that many indexes
+    // out returns the closest point on the border of the
+    // polygon if the point is not inside it.
+    dVector3 a;
+    dVector3 b;
+    dVector3 ab;
+    dVector3 ap;
+    dVector3 v;
+
+    unsigned pointcount=polygon[0];
+    dIASSERT(pointcount != 0);
+    polygon++; // skip past pointcount
+
+    dMultiply0_331 (b,convex->final_posr->R,
+        &convex->points[(polygon[pointcount-1]*3)]);
+    b[0]=convex->final_posr->pos[0]+b[0];
+    b[1]=convex->final_posr->pos[1]+b[1];
+    b[2]=convex->final_posr->pos[2]+b[2];
+
+    for(unsigned i=0; i != pointcount; ++i)
+    {
+        a[0] = b[0];
+        a[1] = b[1];
+        a[2] = b[2];
+
+        dMultiply0_331 (b,convex->final_posr->R,&convex->points[(polygon[i]*3)]);
+        b[0]=convex->final_posr->pos[0]+b[0];
+        b[1]=convex->final_posr->pos[1]+b[1];
+        b[2]=convex->final_posr->pos[2]+b[2];
+
+        ab[0] = b[0] - a[0];
+        ab[1] = b[1] - a[1];
+        ab[2] = b[2] - a[2];
+        ap[0] = p[0] - a[0];
+        ap[1] = p[1] - a[1];
+        ap[2] = p[2] - a[2];
+
+        dCalcVectorCross3(v, ab, plane);
+
+        if (dCalcVectorDot3(ap, v) > REAL(0.0))
+        {
+            dReal ab_m2 = dCalcVectorDot3(ab, ab);
+            dReal s = ab_m2 != REAL(0.0) ? dCalcVectorDot3(ab, ap) / ab_m2 : REAL(0.0);
+
+            if (s <= REAL(0.0))
+            {
+                out[0] = a[0];
+                out[1] = a[1];
+                out[2] = a[2];
+            }
+            else if (s >= REAL(1.0)) 
+            {
+                out[0] = b[0];
+                out[1] = b[1];
+                out[2] = b[2];
+            }
+            else
+            {
+                out[0] = a[0] + ab[0] * s;
+                out[1] = a[1] + ab[1] * s;
+                out[2] = a[2] + ab[2] * s;
+            }
+
+            return false;
+        }
+    }
+
+    return true;
+}
+
+int dCollideConvexPlane (dxGeom *o1, dxGeom *o2, int flags,
+                         dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dConvexClass);
+    dIASSERT (o2->type == dPlaneClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxConvex *Convex = (dxConvex*) o1;
+    dxPlane *Plane = (dxPlane*) o2;
+    unsigned int contacts=0;
+    unsigned int maxc = flags & NUMC_MASK;
+    dVector3 v2;
+
+#define LTEQ_ZERO	0x10000000
+#define GTEQ_ZERO	0x20000000
+#define BOTH_SIGNS	(LTEQ_ZERO | GTEQ_ZERO)
+    dIASSERT((BOTH_SIGNS & NUMC_MASK) == 0); // used in conditional operator later
+
+    unsigned int totalsign = 0;
+    for(unsigned int i=0;i<Convex->pointcount;++i)
+    {
+        dMultiply0_331 (v2,Convex->final_posr->R,&Convex->points[(i*3)]);
+        dVector3Add(Convex->final_posr->pos, v2, v2);
+
+        unsigned int distance2sign = GTEQ_ZERO;
+        dReal distance2 = dVector3Dot(Plane->p, v2) - Plane->p[3]; // Ax + By + Cz - D
+        if((distance2 <= REAL(0.0)))
+        {
+            distance2sign = distance2 != REAL(0.0) ? LTEQ_ZERO : BOTH_SIGNS;
+
+            if (contacts != maxc)
+            {
+                dContactGeom *target = SAFECONTACT(flags, contact, contacts, skip);
+                dVector3Copy(Plane->p, target->normal);
+                dVector3Copy(v2, target->pos);
+                target->depth = -distance2;
+                target->g1 = Convex;
+                target->g2 = Plane;
+                target->side1 = -1; // TODO: set plane index?
+                target->side2 = -1;
+                contacts++;
+            }
+        }
+
+        // Take new sign into account
+        totalsign |= distance2sign;
+        // Check if contacts are full and both signs have been already found
+        if (((contacts ^ maxc) | totalsign) == BOTH_SIGNS) // harder to comprehend but requires one register less
+        {
+            break; // Nothing can be changed any more
+        }
+    }
+    if (totalsign == BOTH_SIGNS) return contacts;
+    return 0;
+#undef BOTH_SIGNS
+#undef GTEQ_ZERO
+#undef LTEQ_ZERO
+}
+
+int dCollideSphereConvex (dxGeom *o1, dxGeom *o2, int flags,
+                          dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dSphereClass);
+    dIASSERT (o2->type == dConvexClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxSphere *Sphere = (dxSphere*) o1;
+    dxConvex *Convex = (dxConvex*) o2;
+    dReal dist,closestdist=dInfinity;
+    dVector4 plane;
+    // dVector3 contactpoint;
+    dVector3 offsetpos,out,temp;
+    const unsigned int *pPoly=Convex->polygons;
+    int closestplane=-1;
+    bool sphereinside=true;
+    /*
+    Do a good old sphere vs plane check first,
+    if a collision is found then check if the contact point
+    is within the polygon
+    */
+    // offset the sphere final_posr->position into the convex space
+    offsetpos[0]=Sphere->final_posr->pos[0]-Convex->final_posr->pos[0];
+    offsetpos[1]=Sphere->final_posr->pos[1]-Convex->final_posr->pos[1];
+    offsetpos[2]=Sphere->final_posr->pos[2]-Convex->final_posr->pos[2];
+    for(unsigned int i=0;i<Convex->planecount;++i)
+    {
+        // apply rotation to the plane
+        dMultiply0_331(plane,Convex->final_posr->R,&Convex->planes[(i*4)]);
+        plane[3]=(&Convex->planes[(i*4)])[3];
+        // Get the distance from the sphere origin to the plane
+        dist = dVector3Dot(plane, offsetpos) - plane[3]; // Ax + By + Cz - D
+        if(dist>0)
+        {
+            // if we get here, we know the center of the sphere is
+            // outside of the convex hull.
+            if(dist<Sphere->radius)
+            {
+                // if we get here we know the sphere surface penetrates
+                // the plane
+                if(IsPointInPolygon(Sphere->final_posr->pos,pPoly,plane,Convex,out))
+                {
+                    // finally if we get here we know that the
+                    // sphere is directly touching the inside of the polyhedron
+                    contact->normal[0] = plane[0];
+                    contact->normal[1] = plane[1];
+                    contact->normal[2] = plane[2];
+                    contact->pos[0] = Sphere->final_posr->pos[0]+
+                        (-contact->normal[0]*Sphere->radius);
+                    contact->pos[1] = Sphere->final_posr->pos[1]+
+                        (-contact->normal[1]*Sphere->radius);
+                    contact->pos[2] = Sphere->final_posr->pos[2]+
+                        (-contact->normal[2]*Sphere->radius);
+                    contact->depth = Sphere->radius-dist;
+                    contact->g1 = Sphere;
+                    contact->g2 = Convex;
+                    contact->side1 = -1;
+                    contact->side2 = -1; // TODO: set plane index?
+                    return 1;
+                }
+                else
+                {
+                    // the sphere may not be directly touching
+                    // the polyhedron, but it may be touching
+                    // a point or an edge, if the distance between
+                    // the closest point on the poly (out) and the
+                    // center of the sphere is less than the sphere
+                    // radius we have a hit.
+                    temp[0] = (Sphere->final_posr->pos[0]-out[0]);
+                    temp[1] = (Sphere->final_posr->pos[1]-out[1]);
+                    temp[2] = (Sphere->final_posr->pos[2]-out[2]);
+                    dist=(temp[0]*temp[0])+(temp[1]*temp[1])+(temp[2]*temp[2]);
+                    // avoid the sqrt unless really necesary
+                    if(dist<(Sphere->radius*Sphere->radius))
+                    {
+                        // We got an indirect hit
+                        dist=dSqrt(dist);
+                        contact->normal[0] = temp[0]/dist;
+                        contact->normal[1] = temp[1]/dist;
+                        contact->normal[2] = temp[2]/dist;
+                        contact->pos[0] = Sphere->final_posr->pos[0]+
+                            (-contact->normal[0]*Sphere->radius);
+                        contact->pos[1] = Sphere->final_posr->pos[1]+
+                            (-contact->normal[1]*Sphere->radius);
+                        contact->pos[2] = Sphere->final_posr->pos[2]+
+                            (-contact->normal[2]*Sphere->radius);
+                        contact->depth = Sphere->radius-dist;
+                        contact->g1 = Sphere;
+                        contact->g2 = Convex;
+                        contact->side1 = -1;
+                        contact->side2 = -1; // TODO: set plane index?
+                        return 1;
+                    }
+                }
+            }
+            sphereinside=false;
+        }
+        if(sphereinside)
+        {
+            if(closestdist>dFabs(dist))
+            {
+                closestdist=dFabs(dist);
+                closestplane=i;
+            }
+        }
+        pPoly+=pPoly[0]+1;
+    }
+    if(sphereinside)
+    {
+        // if the center of the sphere is inside
+        // the Convex, we need to pop it out
+        dMultiply0_331(contact->normal,
+            Convex->final_posr->R,
+            &Convex->planes[(closestplane*4)]);
+        contact->pos[0] = Sphere->final_posr->pos[0];
+        contact->pos[1] = Sphere->final_posr->pos[1];
+        contact->pos[2] = Sphere->final_posr->pos[2];
+        contact->depth = closestdist+Sphere->radius;
+        contact->g1 = Sphere;
+        contact->g2 = Convex;
+        contact->side1 = -1;
+        contact->side2 = -1; // TODO: set plane index?
+        return 1;
+    }
+    return 0;
+}
+
+int dCollideConvexBox (dxGeom *o1, dxGeom *o2, int flags,
+                       dContactGeom * /*contact*/, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dConvexClass);
+    dIASSERT (o2->type == dBoxClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    //dxConvex *Convex = (dxConvex*) o1;
+    //dxBox *Box = (dxBox*) o2;
+
+    return 0;
+}
+
+int dCollideConvexCapsule (dxGeom *o1, dxGeom *o2,
+                           int flags, dContactGeom * /*contact*/, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dConvexClass);
+    dIASSERT (o2->type == dCapsuleClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    //dxConvex *Convex = (dxConvex*) o1;
+    //dxCapsule *Capsule = (dxCapsule*) o2;
+
+    return 0;
+}
+
+inline void ComputeInterval(dxConvex& cvx,dVector4 axis,dReal& min,dReal& max)
+{
+    /* TODO: Use Support points here */
+    dVector3 point;
+    dReal value;
+    //fprintf(stdout,"Compute Interval Axis %f,%f,%f\n",axis[0],axis[1],axis[2]);
+    dMultiply0_331(point,cvx.final_posr->R,cvx.points);
+    //fprintf(stdout,"initial point %f,%f,%f\n",point[0],point[1],point[2]);
+    point[0]+=cvx.final_posr->pos[0];
+    point[1]+=cvx.final_posr->pos[1];
+    point[2]+=cvx.final_posr->pos[2];
+    max = min = dCalcVectorDot3(point,axis)-axis[3];//(*)
+    for (unsigned int i = 1; i < cvx.pointcount; ++i)
+    {
+        dMultiply0_331(point,cvx.final_posr->R,cvx.points+(i*3));
+        point[0]+=cvx.final_posr->pos[0];
+        point[1]+=cvx.final_posr->pos[1];
+        point[2]+=cvx.final_posr->pos[2];
+        value=dCalcVectorDot3(point,axis)-axis[3];//(*)
+        if(value<min)
+        {
+            min=value;
+        }
+        else if(value>max)
+        {
+            max=value;
+        }
+    }
+    // *: usually using the distance part of the plane (axis) is
+    // not necesary, however, here we need it here in order to know
+    // which face to pick when there are 2 parallel sides.
+}
+
+bool CheckEdgeIntersection(dxConvex& cvx1,dxConvex& cvx2, int flags,int& curc,
+                           dContactGeom *contact, int skip)
+{
+    int maxc = flags & NUMC_MASK;
+    dIASSERT(maxc != 0);
+    dVector3 e1,e2,q;
+    dVector4 plane,depthplane;
+    dReal t;
+    for(unsigned int i = 0;i<cvx1.edgecount;++i)
+    {
+        // Rotate
+        dMultiply0_331(e1,cvx1.final_posr->R,cvx1.points+(cvx1.edges[i].first*3));
+        // translate
+        e1[0]+=cvx1.final_posr->pos[0];
+        e1[1]+=cvx1.final_posr->pos[1];
+        e1[2]+=cvx1.final_posr->pos[2];
+        // Rotate
+        dMultiply0_331(e2,cvx1.final_posr->R,cvx1.points+(cvx1.edges[i].second*3));
+        // translate
+        e2[0]+=cvx1.final_posr->pos[0];
+        e2[1]+=cvx1.final_posr->pos[1];
+        e2[2]+=cvx1.final_posr->pos[2];
+        const unsigned int* pPoly=cvx2.polygons;
+        for(sizeint j=0;j<cvx2.planecount;++j)
+        {
+            // Rotate
+            dMultiply0_331(plane,cvx2.final_posr->R,cvx2.planes+(j*4));
+            dNormalize3(plane);
+            // Translate
+            plane[3]=
+                (cvx2.planes[(j*4)+3])+
+                ((plane[0] * cvx2.final_posr->pos[0]) +
+                (plane[1] * cvx2.final_posr->pos[1]) +
+                (plane[2] * cvx2.final_posr->pos[2]));
+            dContactGeom *target = SAFECONTACT(flags, contact, curc, skip);
+            target->g1=&cvx1; // g1 is the one pushed
+            target->g2=&cvx2;
+            if(IntersectSegmentPlane(e1,e2,plane,t,target->pos))
+            {
+                if(IsPointInPolygon(target->pos,pPoly,plane,&cvx2,q))
+                {
+                    target->depth = dInfinity;
+                    for(sizeint k=0;k<cvx2.planecount;++k)
+                    {
+                        if(k==j) continue; // we're already at 0 depth on this plane
+                        // Rotate
+                        dMultiply0_331(depthplane,cvx2.final_posr->R,cvx2.planes+(k*4));
+                        dNormalize3(depthplane);
+                        // Translate
+                        depthplane[3]=
+                            (cvx2.planes[(k*4)+3])+
+                            ((plane[0] * cvx2.final_posr->pos[0]) +
+                            (plane[1] * cvx2.final_posr->pos[1]) +
+                            (plane[2] * cvx2.final_posr->pos[2]));
+                        dReal depth = (dVector3Dot(depthplane, target->pos) - depthplane[3]); // Ax + By + Cz - D
+                        if((fabs(depth)<fabs(target->depth))&&((depth<-dEpsilon)||(depth>dEpsilon)))
+                        {
+                            target->depth=depth;
+                            dVector3Copy(depthplane,target->normal);
+                        }
+                    }
+                    ++curc;
+                    if(curc==maxc)
+                        return true;
+                }
+            }
+            pPoly+=pPoly[0]+1;
+        }
+    }
+    return false;
+}
+
+/*
+Helper struct
+*/
+struct ConvexConvexSATOutput
+{
+    dReal min_depth;
+    int depth_type;
+    dVector3 dist; // distance from center to center, from cvx1 to cvx2
+    dVector3 e1a,e1b,e2a,e2b; // e1a to e1b = edge in cvx1,e2a to e2b = edge in cvx2.
+};
+
+/*! \brief Does an axis separation test using cvx1 planes on cvx1 and cvx2, returns true for a collision false for no collision
+  \param cvx1 [IN] First Convex object, its planes are used to do the tests
+  \param cvx2 [IN] Second Convex object
+  \param min_depth [IN/OUT] Used to input as well as output the minimum depth so far, must be set to a huge value such as dInfinity for initialization.
+  \param g1 [OUT] Pointer to the convex which should be used in the returned contact as g1
+  \param g2 [OUT] Pointer to the convex which should be used in the returned contact as g2
+*/
+inline bool CheckSATConvexFaces(dxConvex& cvx1,
+                                dxConvex& cvx2,
+                                ConvexConvexSATOutput& ccso)
+{
+    dReal min,max,min1,max1,min2,max2,depth;
+    dVector4 plane;
+    for(unsigned int i=0;i<cvx1.planecount;++i)
+    {
+        // -- Apply Transforms --
+        // Rotate
+        dMultiply0_331(plane,cvx1.final_posr->R,cvx1.planes+(i*4));
+        dNormalize3(plane);
+        // Translate
+        plane[3]=
+            (cvx1.planes[(i*4)+3])+
+            ((plane[0] * cvx1.final_posr->pos[0]) +
+            (plane[1] * cvx1.final_posr->pos[1])  +
+            (plane[2] * cvx1.final_posr->pos[2]));
+        ComputeInterval(cvx1,plane,min1,max1);
+        ComputeInterval(cvx2,plane,min2,max2);
+        if(max2<min1 || max1<min2) return false;
+        min = dMAX(min1, min2);
+        max = dMIN(max1, max2);
+        depth = max-min;
+        /*
+        Take only into account the faces that penetrate cvx1 to determine
+        minimum depth
+        ((max2*min2)<=0) = different sign, or one is zero and thus
+        cvx2 barelly touches cvx1
+        */
+        if (((max2*min2)<=0) && (dFabs(depth)<dFabs(ccso.min_depth)))
+        {
+            // Flip plane because the contact normal must point INTO g1,
+            // plus the integrator seems to like positive depths better than negative ones
+            ccso.min_depth=-depth;
+            ccso.depth_type = 1; // 1 = face-something
+        }
+    }
+    return true;
+}
+/*! \brief Does an axis separation test using cvx1 and cvx2 edges, returns true for a collision false for no collision
+  \param cvx1 [IN] First Convex object
+  \param cvx2 [IN] Second Convex object
+  \param min_depth [IN/OUT] Used to input as well as output the minimum depth so far, must be set to a huge value such as dInfinity for initialization.
+  \param g1 [OUT] Pointer to the convex which should be used in the returned contact as g1
+  \param g2 [OUT] Pointer to the convex which should be used in the returned contact as g2
+*/
+inline bool CheckSATConvexEdges(dxConvex& cvx1,
+                                dxConvex& cvx2,
+                                ConvexConvexSATOutput& ccso)
+{
+    // Test cross products of pairs of edges
+    dReal depth,min,max,min1,max1,min2,max2;
+    dVector4 plane;
+    dVector3 e1,e2,e1a,e1b,e2a,e2b;
+    dVector3 dist;
+    dVector3Copy(ccso.dist,dist);
+    unsigned int s1 = cvx1.SupportIndex(dist);
+    // invert direction
+    dVector3Inv(dist);
+    unsigned int s2 = cvx2.SupportIndex(dist);
+    for(unsigned int i = 0;i<cvx1.edgecount;++i)
+    {
+        // Skip edge if it doesn't contain the extremal vertex
+        if((cvx1.edges[i].first!=s1)&&(cvx1.edges[i].second!=s1)) continue;
+        // we only need to apply rotation here
+        dMultiply0_331(e1a,cvx1.final_posr->R,cvx1.points+(cvx1.edges[i].first*3));
+        dMultiply0_331(e1b,cvx1.final_posr->R,cvx1.points+(cvx1.edges[i].second*3));
+        e1[0]=e1b[0]-e1a[0];
+        e1[1]=e1b[1]-e1a[1];
+        e1[2]=e1b[2]-e1a[2];
+        for(unsigned int j = 0;j<cvx2.edgecount;++j)
+        {
+            // Skip edge if it doesn't contain the extremal vertex
+            if((cvx2.edges[j].first!=s2)&&(cvx2.edges[j].second!=s2)) continue;
+            // we only need to apply rotation here
+            dMultiply0_331 (e2a,cvx2.final_posr->R,cvx2.points+(cvx2.edges[j].first*3));
+            dMultiply0_331 (e2b,cvx2.final_posr->R,cvx2.points+(cvx2.edges[j].second*3));
+            e2[0]=e2b[0]-e2a[0];
+            e2[1]=e2b[1]-e2a[1];
+            e2[2]=e2b[2]-e2a[2];
+            dCalcVectorCross3(plane,e1,e2);
+            if(dCalcVectorDot3(plane,plane)<dEpsilon) /* edges are parallel */ continue;
+            dNormalize3(plane);
+            plane[3]=0;
+            ComputeInterval(cvx1,plane,min1,max1);
+            ComputeInterval(cvx2,plane,min2,max2);
+            if(max2 < min1 || max1 < min2) return false;
+            min = dMAX(min1, min2);
+            max = dMIN(max1, max2);
+            depth = max-min;
+            if (((dFabs(depth)+dEpsilon)<dFabs(ccso.min_depth)))
+            {
+                ccso.min_depth=depth;
+                ccso.depth_type = 2; // 2 means edge-edge
+                // use cached values, add position
+                dVector3Copy(e1a,ccso.e1a);
+                dVector3Copy(e1b,ccso.e1b);
+                ccso.e1a[0]+=cvx1.final_posr->pos[0];
+                ccso.e1a[1]+=cvx1.final_posr->pos[1];
+                ccso.e1a[2]+=cvx1.final_posr->pos[2];
+                ccso.e1b[0]+=cvx1.final_posr->pos[0];
+                ccso.e1b[1]+=cvx1.final_posr->pos[1];
+                ccso.e1b[2]+=cvx1.final_posr->pos[2];
+                dVector3Copy(e2a,ccso.e2a);
+                dVector3Copy(e2b,ccso.e2b);
+                ccso.e2a[0]+=cvx2.final_posr->pos[0];
+                ccso.e2a[1]+=cvx2.final_posr->pos[1];
+                ccso.e2a[2]+=cvx2.final_posr->pos[2];
+                ccso.e2b[0]+=cvx2.final_posr->pos[0];
+                ccso.e2b[1]+=cvx2.final_posr->pos[1];
+                ccso.e2b[2]+=cvx2.final_posr->pos[2];
+            }
+        }
+    }
+    return true;
+}
+
+#if 0
+/*! \brief Returns the index of the plane/side of the incident convex (ccso.g2)
+ *  which is closer to the reference convex (ccso.g1) side
+ *
+ *  This function just looks for the incident face that is facing the reference face
+ *  and is the closest to being parallel to it, which sometimes is.
+ */
+inline unsigned int GetIncidentSide(ConvexConvexSATOutput& ccso)
+{
+    dVector3 nis; // (N)ormal in (I)ncident convex (S)pace
+    dReal SavedDot;
+    dReal Dot;
+    unsigned int incident_side=0;
+    // Rotate the plane normal into incident convex space
+    // (things like this should be done all over this file,
+    //  will look into that)
+    dMultiply1_331(nis,ccso.g2->final_posr->R,ccso.plane);
+    SavedDot = dCalcVectorDot3(nis,ccso.g2->planes);
+    for(unsigned int i=1;i<ccso.g2->planecount;++i)
+    {
+        Dot = dCalcVectorDot3(nis,ccso.g2->planes+(i*4));
+        if(Dot>SavedDot)
+        {
+            SavedDot=Dot;
+            incident_side=i;
+        }
+    }
+    return incident_side;
+}
+#endif
+
+inline unsigned int GetSupportSide(dVector3& dir,dxConvex& cvx)
+{
+    dVector3 dics,tmp; // Direction in convex space
+    dReal SavedDot;
+    dReal Dot;
+    unsigned int side=0;
+    dVector3Copy(dir,tmp);
+    dNormalize3(tmp);
+    dMultiply1_331(dics,cvx.final_posr->R,tmp);
+    SavedDot = dCalcVectorDot3(dics,cvx.planes);
+    for(unsigned int i=1;i<cvx.planecount;++i)
+    {
+        Dot = dCalcVectorDot3(dics,cvx.planes+(i*4));
+        if(Dot>SavedDot)
+        {
+            SavedDot=Dot;
+            side=i;
+        }
+    }
+    return side;
+}
+
+/*! \brief Does an axis separation test between the 2 convex shapes
+using faces and edges */
+int TestConvexIntersection(dxConvex& cvx1,dxConvex& cvx2, int flags,
+                           dContactGeom *contact, int skip)
+{
+    ConvexConvexSATOutput ccso;
+#ifndef dNDEBUG
+    memset(&ccso, 0, sizeof(ccso)); // get rid of 'uninitialized values' warning
+#endif
+    ccso.min_depth=dInfinity; // Min not min at all
+    ccso.depth_type=0; // no type
+    // precompute distance vector
+    dSubtractVectors3(ccso.dist, cvx2.final_posr->pos, cvx1.final_posr->pos);
+    int maxc = flags & NUMC_MASK;
+    dIASSERT(maxc != 0);
+    dVector3 i1,i2,r1,r2; // edges of incident and reference faces respectively
+    int contacts=0;
+    if(!CheckSATConvexFaces(cvx1,cvx2,ccso))
+    {
+        return 0;
+    }
+    else
+        if(!CheckSATConvexFaces(cvx2,cvx1,ccso))
+        {
+            return 0;
+        }
+        else if(!CheckSATConvexEdges(cvx1,cvx2,ccso))
+        {
+            return 0;
+        }
+        // If we get here, there was a collision
+        if(ccso.depth_type==1) // face-face
+        {
+            // cvx1 MUST always be in contact->g1 and cvx2 in contact->g2
+            // This was learned the hard way :(
+            unsigned int incident_side;
+            const unsigned int* pIncidentPoly;
+            const unsigned int* pIncidentPoints;
+            unsigned int reference_side;
+            const unsigned int* pReferencePoly;
+            const unsigned int* pReferencePoints;
+            dVector4 plane,rplane,iplane;
+            dVector3 tmp;
+            dVector3 dist,p;
+            dReal t,d,d1,d2;
+            bool outside,out;
+            dVector3Copy(ccso.dist,dist);
+            reference_side = GetSupportSide(dist,cvx1);
+            dNegateVector3(dist);
+            incident_side = GetSupportSide(dist,cvx2);
+
+            pReferencePoly = cvx1.polygons;
+            pIncidentPoly  = cvx2.polygons;
+            // Get Reference plane (We may not have to apply transforms Optimization Oportunity)
+            // Rotate
+            dMultiply0_331(rplane,cvx1.final_posr->R,cvx1.planes+(reference_side*4));
+            dNormalize3(rplane);
+            // Translate
+            rplane[3]=
+                (cvx1.planes[(reference_side*4)+3])+
+                ((rplane[0] * cvx1.final_posr->pos[0]) +
+                (rplane[1] * cvx1.final_posr->pos[1]) +
+                (rplane[2] * cvx1.final_posr->pos[2]));
+            // flip
+            rplane[0]=-rplane[0];
+            rplane[1]=-rplane[1];
+            rplane[2]=-rplane[2];
+            rplane[3]=-rplane[3];
+            for(unsigned int i=0;i<incident_side;++i)
+            {
+                pIncidentPoly+=pIncidentPoly[0]+1;
+            }
+            pIncidentPoints = pIncidentPoly+1;
+            // Get the first point of the incident face
+            dMultiply0_331(i2,cvx2.final_posr->R,&cvx2.points[(pIncidentPoints[0]*3)]);
+            dVector3Add(i2,cvx2.final_posr->pos,i2);
+            // Get the same point in the reference convex space
+            dVector3Copy(i2,r2);
+            dVector3Subtract(r2,cvx1.final_posr->pos,r2);
+            dVector3Copy(r2,tmp);
+            dMultiply1_331(r2,cvx1.final_posr->R,tmp);
+            for(unsigned int i=0;i<pIncidentPoly[0];++i)
+            {
+                // Move i2 to i1, r2 to r1
+                dVector3Copy(i2,i1);
+                dVector3Copy(r2,r1);
+                dMultiply0_331(i2,cvx2.final_posr->R,&cvx2.points[(pIncidentPoints[(i+1)%pIncidentPoly[0]]*3)]);
+                dVector3Add(i2,cvx2.final_posr->pos,i2);
+                // Get the same point in the reference convex space
+                dVector3Copy(i2,r2);
+                dVector3Subtract(r2,cvx1.final_posr->pos,r2);
+                dVector3Copy(r2,tmp);
+                dMultiply1_331(r2,cvx1.final_posr->R,tmp);
+                outside=false;
+                for(unsigned int j=0;j<cvx1.planecount;++j)
+                {
+                    plane[0]=cvx1.planes[(j*4)+0];
+                    plane[1]=cvx1.planes[(j*4)+1];
+                    plane[2]=cvx1.planes[(j*4)+2];
+                    plane[3]=cvx1.planes[(j*4)+3];
+                    // Get the distance from the points to the plane
+                    d1 = r1[0]*plane[0]+
+                        r1[1]*plane[1]+
+                        r1[2]*plane[2]-
+                        plane[3];
+                    d2 = r2[0]*plane[0]+
+                        r2[1]*plane[1]+
+                        r2[2]*plane[2]-
+                        plane[3];
+                    if(d1*d2<0)
+                    {
+                        out = false;
+
+                        // Edge intersects plane
+                        if (!IntersectSegmentPlane(r1,r2,plane,t,p))
+                        {
+                            out = true;
+                        }
+
+                        if (!out)
+                        {
+                            // Check the resulting point again to make sure it is inside the reference convex
+                            for (unsigned int k = 0; k < cvx1.planecount; ++k)
+                            {
+                                d = p[0]*cvx1.planes[(k*4)+0]+
+                                    p[1]*cvx1.planes[(k*4)+1]+
+                                    p[2]*cvx1.planes[(k*4)+2]-
+                                    cvx1.planes[(k*4)+3];
+                                if(d>0)
+                                {
+                                    out = true;
+                                    break;
+                                }
+                            }
+                        }
+
+                        if(!out)
+                        {
+#if 0
+                            // Use t to move p into global space
+                            p[0] = i1[0]+((i2[0]-i1[0])*t);
+                            p[1] = i1[1]+((i2[1]-i1[1])*t);
+                            p[2] = i1[2]+((i2[2]-i1[2])*t);
+#else
+                            // Apply reference convex transformations to p
+                            // The commented out piece of code is likelly to
+                            // produce less operations than this one, but
+                            // this way we know we are getting the right data
+                            dMultiply0_331(tmp,cvx1.final_posr->R,p);
+                            dVector3Add(tmp,cvx1.final_posr->pos,p);
+#endif
+                            // get p's distance to reference plane
+                            d = p[0]*rplane[0]+
+                                p[1]*rplane[1]+
+                                p[2]*rplane[2]-
+                                rplane[3];
+                            if(d>0)
+                            {
+                                dContactGeom *target = SAFECONTACT(flags, contact, contacts, skip);
+                                dVector3Copy(p, target->pos);
+                                dVector3Copy(rplane, target->normal);
+                                target->g1 = &cvx1;
+                                target->g2 = &cvx2;
+                                target->depth = d;
+                                ++contacts;
+                                if (contacts==maxc) return contacts;
+                            }
+                        }
+                    }
+                    if(d1>0)
+                    {
+                        outside=true;
+                    }
+                }
+                if(outside) continue;
+                d = i1[0]*rplane[0]+
+                    i1[1]*rplane[1]+
+                    i1[2]*rplane[2]-
+                    rplane[3];
+                if(d>0)
+                {
+                    dContactGeom *target = SAFECONTACT(flags, contact, contacts, skip);
+                    dVector3Copy(i1, target->pos);
+                    dVector3Copy(rplane, target->normal);
+                    target->g1 = &cvx1;
+                    target->g2 = &cvx2;
+                    target->depth = d;
+                    ++contacts;
+                    if (contacts==maxc) return contacts;
+                }
+            }
+            // IF we get here, we got the easiest contacts to calculate,
+            // but there is still space in the contacts array for more.
+            // So, project the Reference's face points onto the Incident face
+            // plane and test them for inclusion in the reference plane as well.
+            // We already have computed intersections so, skip those.
+
+            /* Get Incident plane, we need it for projection */
+            /* Rotate */
+            dMultiply0_331(iplane,cvx2.final_posr->R,cvx2.planes+(incident_side*4));
+            dNormalize3(iplane);
+            /* Translate */
+            iplane[3]=
+                (cvx2.planes[(incident_side*4)+3])     +
+                ((iplane[0] * cvx2.final_posr->pos[0]) +
+                (iplane[1] * cvx2.final_posr->pos[1])  +
+                (iplane[2] * cvx2.final_posr->pos[2]));
+            // get reference face
+            for(unsigned int i=0;i<reference_side;++i)
+            {
+                pReferencePoly+=pReferencePoly[0]+1;
+            }
+            pReferencePoints = pReferencePoly+1;
+            for(unsigned int i=0;i<pReferencePoly[0];++i)
+            {
+                dMultiply0_331(i1,cvx1.final_posr->R,&cvx1.points[(pReferencePoints[i]*3)]);
+                dVector3Add(cvx1.final_posr->pos,i1,i1);
+                // Project onto Incident face plane
+                t = -(i1[0]*iplane[0]+
+                    i1[1]*iplane[1]+
+                    i1[2]*iplane[2]-
+                    iplane[3]);
+                i1[0]+=iplane[0]*t;
+                i1[1]+=iplane[1]*t;
+                i1[2]+=iplane[2]*t;
+                // Get the same point in the incident convex space
+                dVector3Copy(i1,r1);
+                dVector3Subtract(r1,cvx2.final_posr->pos,r1);
+                dVector3Copy(r1,tmp);
+                dMultiply1_331(r1,cvx2.final_posr->R,tmp);
+                // Check if it is outside the incident convex
+                out = false;
+                for(unsigned int j=0;j<cvx2.planecount;++j)
+                {
+                    d = r1[0]*cvx2.planes[(j*4)+0]+
+                        r1[1]*cvx2.planes[(j*4)+1]+
+                        r1[2]*cvx2.planes[(j*4)+2]-
+                        cvx2.planes[(j*4)+3];
+                    if(d>=0){out = true;break;};
+                }
+                if(!out)
+                {
+                    // check that the point is not a duplicate
+                    outside = false;
+                    for(int j=0;j<contacts;++j)
+                    {
+                        dContactGeom *cur_contact = SAFECONTACT(flags, contact, j, skip);
+                        if((cur_contact->pos[0] == i1[0]) &&
+                            (cur_contact->pos[1] == i1[1]) &&
+                            (cur_contact->pos[2] == i1[2]))
+                        {
+                            outside=true;
+                        }
+                    }
+                    if(!outside)
+                    {
+                        d = i1[0]*rplane[0]+
+                            i1[1]*rplane[1]+
+                            i1[2]*rplane[2]-
+                            rplane[3];
+                        if(d>0)
+                        {
+                            dContactGeom *target = SAFECONTACT(flags, contact, contacts, skip);
+                            dVector3Copy(i1, target->pos);
+                            dVector3Copy(rplane, target->normal);
+                            target->g1 = &cvx1;
+                            target->g2 = &cvx2;
+                            target->depth = d;
+                            ++contacts;
+                            if (contacts==maxc) return contacts;
+                        }
+                    }
+                }
+            }
+        }
+        else if (ccso.depth_type == 2) // edge-edge
+        {
+            dVector3 c1, c2;
+            ClosestPointBetweenSegments(ccso.e1a, ccso.e1b, ccso.e2a, ccso.e2b, c1, c2);
+
+            dContactGeom *target = SAFECONTACT(flags, contact, contacts, skip);
+            dSubtractVectors3(target->normal, c2, c1);
+            dReal depth_square = dCalcVectorLengthSquare3(target->normal);
+
+            if (dxSafeNormalize3(target->normal))
+            {
+                target->depth = dSqrt(depth_square);
+            }
+            else
+            {
+                // If edges coincide return direction from one center to the other as the contact normal
+                dVector3Copy(ccso.dist, target->normal);
+
+                if (!dxSafeNormalize3(target->normal))
+                {
+                    // If the both centers coincide as well return an arbitrary vector. The depth is going to be zero anyway.
+                    dAssignVector3(target->normal, 1, 0, 0);
+                }
+
+                target->depth = 0; // Since the edges coincide, return a contact of zero depth
+            }
+
+            target->g1 = &cvx1;
+            target->g2 = &cvx2;
+            dVector3Copy(c1, target->pos);
+            contacts++;
+        }
+        return contacts;
+}
+
+int dCollideConvexConvex (dxGeom *o1, dxGeom *o2, int flags,
+                          dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dConvexClass);
+    dIASSERT (o2->type == dConvexClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+    dxConvex *Convex1 = (dxConvex*) o1;
+    dxConvex *Convex2 = (dxConvex*) o2;
+    return TestConvexIntersection(*Convex1,*Convex2,flags,
+        contact,skip);
+}
+
+#if 0
+int dCollideRayConvex (dxGeom *o1, dxGeom *o2, int flags,
+                       dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT( o1->type == dRayClass );
+    dIASSERT( o2->type == dConvexClass );
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+    dxRay* ray = (dxRay*) o1;
+    dxConvex* convex = (dxConvex*) o2;
+    dVector3 origin,destination,contactpoint,out;
+    dReal depth;
+    dVector4 plane;
+    unsigned int *pPoly=convex->polygons;
+    // Calculate ray origin and destination
+    destination[0]=0;
+    destination[1]=0;
+    destination[2]= ray->length;
+    // -- Rotate --
+    dMultiply0_331(destination,ray->final_posr->R,destination);
+    origin[0]=ray->final_posr->pos[0];
+    origin[1]=ray->final_posr->pos[1];
+    origin[2]=ray->final_posr->pos[2];
+    destination[0]+=origin[0];
+    destination[1]+=origin[1];
+    destination[2]+=origin[2];
+    for(int i=0;i<convex->planecount;++i)
+    {
+        // Rotate
+        dMultiply0_331(plane,convex->final_posr->R,convex->planes+(i*4));
+        // Translate
+        plane[3]=
+            (convex->planes[(i*4)+3])+
+            ((plane[0] * convex->final_posr->pos[0]) +
+            (plane[1] * convex->final_posr->pos[1]) +
+            (plane[2] * convex->final_posr->pos[2]));
+        if(IntersectSegmentPlane(origin,
+            destination,
+            plane,
+            depth,
+            contactpoint))
+        {
+            if(IsPointInPolygon(contactpoint,pPoly,plane,convex,out))
+            {
+                contact->pos[0]=contactpoint[0];
+                contact->pos[1]=contactpoint[1];
+                contact->pos[2]=contactpoint[2];
+                contact->normal[0]=plane[0];
+                contact->normal[1]=plane[1];
+                contact->normal[2]=plane[2];
+                contact->depth=depth;
+                contact->g1 = ray;
+                contact->g2 = convex;
+                contact->side1 = -1;
+                contact->side2 = -1; // TODO: set plane index?
+                return 1;
+            }
+        }
+        pPoly+=pPoly[0]+1;
+    }
+    return 0;
+}
+#else
+// Ray - Convex collider by David Walters, June 2006
+int dCollideRayConvex(dxGeom *o1, dxGeom *o2,
+                      int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT(skip >= (int)sizeof(dContactGeom));
+    dIASSERT(o1->type == dRayClass);
+    dIASSERT(o2->type == dConvexClass);
+    dIASSERT((flags & NUMC_MASK) >= 1);
+
+    dxRay* ray = (dxRay*)o1;
+    dxConvex* convex = (dxConvex*)o2;
+
+    contact->g1 = ray;
+    contact->g2 = convex;
+    contact->side1 = -1;
+    contact->side2 = -1; // TODO: set plane index?
+
+    dReal alpha, beta, nsign;
+    int flag = 0;
+
+    //
+    // Compute some useful info
+    //
+
+    dVector3 ray_pos = {
+        ray->final_posr->pos[0] - convex->final_posr->pos[0],
+        ray->final_posr->pos[1] - convex->final_posr->pos[1],
+        ray->final_posr->pos[2] - convex->final_posr->pos[2]
+    };
+
+    dVector3 ray_dir = {
+        ray->final_posr->R[0 * 4 + 2],
+        ray->final_posr->R[1 * 4 + 2],
+        ray->final_posr->R[2 * 4 + 2]
+    };
+
+    dMultiply1_331(ray_pos, convex->final_posr->R, ray_pos);
+    dMultiply1_331(ray_dir, convex->final_posr->R, ray_dir);
+
+    for (unsigned int i = 0; i < convex->planecount; ++i)
+    {
+        // Alias this plane.
+        const dReal* plane = convex->planes + (i * 4);
+
+        // If alpha >= 0 then start point is outside of plane.
+        alpha = dCalcVectorDot3(plane, ray_pos) - plane[3];
+
+        // If any alpha is positive, then
+        // the ray start is _outside_ of the hull
+        if (alpha >= 0)
+        {
+            flag = 1;
+            break;
+        }
+    }
+
+    // If the ray starts inside the convex hull, then everything is flipped.
+    nsign = (flag) ? REAL(1.0) : REAL(-1.0);
+
+
+    //
+    // Find closest contact point
+    //
+
+    // Assume no contacts.
+    contact->depth = dInfinity;
+
+    for (unsigned int i = 0; i < convex->planecount; ++i)
+    {
+        // Alias this plane.
+        const dReal* plane = convex->planes + (i * 4);
+
+        // If alpha >= 0 then point is outside of plane.
+        alpha = nsign * (dCalcVectorDot3(plane, ray_pos) - plane[3]);
+
+        // Compute [ plane-normal DOT ray-normal ], (/flip)
+        beta = dCalcVectorDot3(plane, ray_dir) * nsign;
+
+        // Ray is pointing at the plane? ( beta < 0 )
+        // Ray start to plane is within maximum ray length?
+        // Ray start to plane is closer than the current best distance?
+        if (beta < -dEpsilon &&
+            alpha >= 0 && alpha <= ray->length &&
+            alpha < contact->depth)
+        {
+            // Compute contact point on convex hull surface.
+            contact->pos[0] = ray_pos[0] + alpha * ray_dir[0];
+            contact->pos[1] = ray_pos[1] + alpha * ray_dir[1];
+            contact->pos[2] = ray_pos[2] + alpha * ray_dir[2];
+
+            flag = 0;
+
+            // For all _other_ planes.
+            for (unsigned int j = 0; j < convex->planecount; ++j)
+            {
+                if (i == j)
+                    continue;	// Skip self.
+
+                // Alias this plane.
+                const dReal* planej = convex->planes + (j * 4);
+
+                // If beta >= 0 then start is outside of plane.
+                beta = dCalcVectorDot3(planej, contact->pos) - planej[3];
+
+                // If any beta is positive, then the contact point
+                // is not on the surface of the convex hull - it's just
+                // intersecting some part of its infinite extent.
+                if (beta > dEpsilon)
+                {
+                    flag = 1;
+                    break;
+                }
+            }
+
+            // Contact point isn't outside hull's surface? then it's a good contact!
+            if (flag == 0)
+            {
+                // Store the contact normal, possibly flipped.
+                contact->normal[0] = nsign * plane[0];
+                contact->normal[1] = nsign * plane[1];
+                contact->normal[2] = nsign * plane[2];
+
+                // Store depth
+                contact->depth = alpha;
+
+                if ((flags & CONTACTS_UNIMPORTANT) && contact->depth <= ray->length)
+                {
+                    // Break on any contact if contacts are not important
+                    break;
+                }
+            }
+        }
+    }
+    // Contact?
+    if (contact->depth <= ray->length)
+    {
+        // Adjust contact position and normal back to global space
+        dMultiply0_331(contact->pos, convex->final_posr->R, contact->pos);
+        dMultiply0_331(contact->normal, convex->final_posr->R, contact->normal);
+        contact->pos[0] += convex->final_posr->pos[0];
+        contact->pos[1] += convex->final_posr->pos[1];
+        contact->pos[2] += convex->final_posr->pos[2];
+        return true;
+    }
+    return false;
+}
+
+#endif
+//<-- Convex Collision
diff --git a/libs/ode-0.16.1/ode/src/coop_matrix_types.h b/libs/ode-0.16.1/ode/src/coop_matrix_types.h
new file mode 100644
index 0000000..d94e04b
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/coop_matrix_types.h
@@ -0,0 +1,158 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// Cooperative matrix algorithm types
+// Copyright (C) 2017-2019 Oleh Derevenko (odar@eleks.com - change all "a" to "e")
+
+
+#ifndef _ODE_COOP_MATRIX_TYPES_H_
+#define _ODE_COOP_MATRIX_TYPES_H_
+
+
+
+#include "threadingutils.h"
+#include "common.h"
+#include "error.h"
+
+
+#ifndef dCOOPERATIVE_ENABLED
+
+#if dATOMICS_ENABLED && !dTHREADING_INTF_DISABLED
+
+#define dCOOPERATIVE_ENABLED 1
+
+
+#endif // #if dATOMICS_ENABLED && !dTHREADING_INTF_DISABLED
+
+
+#endif // #ifndef dCOOPERATIVE_ENABLED
+
+
+enum
+{
+    COOP_THREAD_DATA_ALIGNMENT_SIZE      = 64, // Typical size of a cache line
+};
+
+
+typedef uintptr cellindexint;
+
+
+enum CellContextInstance
+{
+    CCI__MIN,
+
+    CCI_FIRST = CCI__MIN,
+    CCI_SECOND,
+
+    CCI__MAX,
+    CCI__LOG2_OF_MAX = 1,
+
+    CCI__DEFAULT = CCI__MIN,
+};
+dSASSERT(1 << CCI__LOG2_OF_MAX >= CCI__MAX);
+
+static inline 
+CellContextInstance buildNextContextInstance(CellContextInstance instance)
+{
+    dIASSERT(dIN_RANGE(instance, CCI__MIN, CCI__MAX));
+    dSASSERT(CCI__MAX == 2);
+
+    return (CellContextInstance)(CCI_FIRST + CCI_SECOND - instance);
+}
+
+
+enum
+{
+    CELLDESC_CCI_BITMASK    = (1 << CCI__LOG2_OF_MAX) - 1,
+    CELLDESC_LOCK_BIT       = 1 << CCI__LOG2_OF_MAX,
+    CELLDESC__HELPER_BITS   = CELLDESC_CCI_BITMASK | CELLDESC_LOCK_BIT,
+    CELLDESC__COLINDEX_BASE = CELLDESC__HELPER_BITS + 1,
+};
+
+#define MAKE_CELLDESCRIPTOR(columnIndex, contextInstance, locked) ((cellindexint)((cellindexint)(columnIndex) * CELLDESC__COLINDEX_BASE + (contextInstance) + ((locked) ? CELLDESC_LOCK_BIT : 0)))
+#define MARK_CELLDESCRIPTOR_LOCKED(descriptor) ((cellindexint)((descriptor) | CELLDESC_LOCK_BIT))
+#define GET_CELLDESCRIPTOR_COLUMNINDEX(descriptor) ((unsigned int)((cellindexint)(descriptor) / CELLDESC__COLINDEX_BASE))
+#define GET_CELLDESCRIPTOR_CONTEXTINSTANCE(descriptor) ((CellContextInstance)((descriptor) & CELLDESC_CCI_BITMASK))
+#define GET_CELLDESCRIPTOR_ISLOCKED(descriptor) (((descriptor) & CELLDESC_LOCK_BIT) != 0)
+
+#define INVALID_CELLDESCRIPTOR      MAKE_CELLDESCRIPTOR(GET_CELLDESCRIPTOR_COLUMNINDEX(-1), CCI__MAX - 1, true)
+
+
+enum BlockProcessingState
+{
+    BPS_COMPETING_FOR_A_BLOCK = -1,
+    BPS_NO_BLOCKS_PROCESSED,
+    BPS_SOME_BLOCKS_PROCESSED,
+};
+
+
+class CooperativeAtomics
+{
+public:
+    static atomicord32 AtomicDecrementUint32(volatile atomicord32 *paoDestination)
+    {
+#if dCOOPERATIVE_ENABLED
+        return ::AtomicDecrement(paoDestination);
+#else
+        dIASSERT(false); return 0; // The function is not supposed to be called in this case
+#endif // #if dCOOPERATIVE_ENABLED
+    }
+
+    static bool AtomicCompareExchangeUint32(volatile atomicord32 *paoDestination, atomicord32 aoComparand, atomicord32 aoExchange)
+    {
+#if dCOOPERATIVE_ENABLED
+        return ::AtomicCompareExchange(paoDestination, aoComparand, aoExchange);
+#else
+        dIASSERT(false); return false; // The function is not supposed to be called in this case
+#endif // #if dCOOPERATIVE_ENABLED
+    }
+
+    static bool AtomicCompareExchangeCellindexint(volatile cellindexint *destination, cellindexint comparand, cellindexint exchange)
+    {
+#if dCOOPERATIVE_ENABLED
+        return ::AtomicCompareExchangePointer((volatile atomicptr *)destination, (atomicptr)comparand, (atomicptr)exchange);
+#else
+        dIASSERT(false); return false; // The function is not supposed to be called in this case
+#endif // #if dCOOPERATIVE_ENABLED
+    }
+
+    static void AtomicStoreCellindexint(volatile cellindexint *destination, cellindexint value)
+    {
+#if dCOOPERATIVE_ENABLED
+        ::AtomicStorePointer((volatile atomicptr *)destination, (atomicptr)value);
+#else
+        dIASSERT(false); // The function is not supposed to be called in this case
+#endif // #if dCOOPERATIVE_ENABLED
+    }
+
+    static void AtomicReadReorderBarrier()
+    {
+#if dCOOPERATIVE_ENABLED
+        ::AtomicReadReorderBarrier();
+#else
+        dIASSERT(false); // The function is not supposed to be called in this case
+#endif // #if dCOOPERATIVE_ENABLED
+    }
+};
+
+
+#endif // #ifndef _ODE_COOP_MATRIX_TYPES_H_
diff --git a/libs/ode-0.16.1/ode/src/cylinder.cpp b/libs/ode-0.16.1/ode/src/cylinder.cpp
new file mode 100644
index 0000000..cf5cc64
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/cylinder.cpp
@@ -0,0 +1,108 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+standard ODE geometry primitives: public API and pairwise collision functions.
+
+the rule is that only the low level primitive collision functions should set
+dContactGeom::g1 and dContactGeom::g2.
+
+*/
+
+#include <ode/common.h>
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"
+#include "collision_std.h"
+#include "collision_util.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+
+#define dMAX(A,B)  ((A)>(B) ? (A) : (B))
+
+
+// flat cylinder public API
+
+dxCylinder::dxCylinder (dSpaceID space, dReal _radius, dReal _length) :
+dxGeom (space,1)
+{
+    dAASSERT (_radius >= 0 && _length >= 0);
+    type = dCylinderClass;
+    radius = _radius;
+    lz = _length;
+    updateZeroSizedFlag(!_radius || !_length);
+}
+
+
+void dxCylinder::computeAABB()
+{
+    const dMatrix3& R = final_posr->R;
+    const dVector3& pos = final_posr->pos;
+
+    dReal dOneMinusR2Square = (dReal)(REAL(1.0) - R[2]*R[2]);
+    dReal xrange = dFabs(R[2]*lz*REAL(0.5)) + radius * dSqrt(dMAX(REAL(0.0), dOneMinusR2Square));
+    dReal dOneMinusR6Square = (dReal)(REAL(1.0) - R[6]*R[6]);
+    dReal yrange = dFabs(R[6]*lz*REAL(0.5)) + radius * dSqrt(dMAX(REAL(0.0), dOneMinusR6Square));
+    dReal dOneMinusR10Square = (dReal)(REAL(1.0) - R[10]*R[10]);
+    dReal zrange = dFabs(R[10]*lz*REAL(0.5)) + radius * dSqrt(dMAX(REAL(0.0), dOneMinusR10Square));
+
+    aabb[0] = pos[0] - xrange;
+    aabb[1] = pos[0] + xrange;
+    aabb[2] = pos[1] - yrange;
+    aabb[3] = pos[1] + yrange;
+    aabb[4] = pos[2] - zrange;
+    aabb[5] = pos[2] + zrange;
+}
+
+
+dGeomID dCreateCylinder (dSpaceID space, dReal radius, dReal length)
+{
+    return new dxCylinder (space,radius,length);
+}
+
+void dGeomCylinderSetParams (dGeomID cylinder, dReal radius, dReal length)
+{
+    dUASSERT (cylinder && cylinder->type == dCylinderClass,"argument not a ccylinder");
+    dAASSERT (radius >= 0 && length >= 0);
+    dxCylinder *c = (dxCylinder*) cylinder;
+    c->radius = radius;
+    c->lz = length;
+    c->updateZeroSizedFlag(!radius || !length);
+    dGeomMoved (cylinder);
+}
+
+void dGeomCylinderGetParams (dGeomID cylinder, dReal *radius, dReal *length)
+{
+    dUASSERT (cylinder && cylinder->type == dCylinderClass,"argument not a ccylinder");
+    dxCylinder *c = (dxCylinder*) cylinder;
+    *radius = c->radius;
+    *length = c->lz;
+}
+
+
diff --git a/libs/ode-0.16.1/ode/src/default_threading.cpp b/libs/ode-0.16.1/ode/src/default_threading.cpp
new file mode 100644
index 0000000..7f255f6
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/default_threading.cpp
@@ -0,0 +1,77 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading base wrapper class header file.                             *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * The default threading instance holder class implementation
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+
+#include <ode/common.h>
+#include <ode/threading_impl.h>
+#include "config.h"
+#include "default_threading.h"
+#include "error.h"
+
+
+/*static */dThreadingImplementationID DefaultThreadingHolder::m_defaultThreadingImpl = NULL;
+/*static */const dThreadingFunctionsInfo *DefaultThreadingHolder::m_defaultThreadingFunctions = NULL;
+
+
+/*static */
+bool DefaultThreadingHolder::initializeDefaultThreading()
+{
+    dIASSERT(m_defaultThreadingImpl == NULL);
+
+    bool initResult = false;
+
+    dThreadingImplementationID threadingImpl = dThreadingAllocateSelfThreadedImplementation();
+
+    if (threadingImpl != NULL)
+    {
+        m_defaultThreadingFunctions = dThreadingImplementationGetFunctions(threadingImpl);
+        m_defaultThreadingImpl = threadingImpl;
+
+        initResult = true;
+    }
+
+    return initResult;
+}
+
+/*static */
+void DefaultThreadingHolder::finalizeDefaultThreading()
+{
+    dThreadingImplementationID threadingImpl = m_defaultThreadingImpl;
+
+    if (threadingImpl != NULL)
+    {
+        dThreadingFreeImplementation(threadingImpl);
+
+        m_defaultThreadingFunctions = NULL;
+        m_defaultThreadingImpl = NULL;
+    }
+}
+
diff --git a/libs/ode-0.16.1/ode/src/default_threading.h b/libs/ode-0.16.1/ode/src/default_threading.h
new file mode 100644
index 0000000..372a777
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/default_threading.h
@@ -0,0 +1,55 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading base wrapper class header file.                             *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * A default threading instance holder class definition
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+
+#ifndef _ODE__PRIVATE_DEFAULT_THREADING_H_
+#define _ODE__PRIVATE_DEFAULT_THREADING_H_
+
+
+#include <ode/threading.h>
+
+
+class DefaultThreadingHolder
+{
+public:
+    static bool initializeDefaultThreading();
+    static void finalizeDefaultThreading();
+
+    static dThreadingImplementationID getDefaultThreadingImpl() { return m_defaultThreadingImpl; }
+    static const dThreadingFunctionsInfo *getDefaultThreadingFunctions() { return m_defaultThreadingFunctions; }
+
+private:
+    static dThreadingImplementationID       m_defaultThreadingImpl;
+    static const dThreadingFunctionsInfo    *m_defaultThreadingFunctions;
+};
+
+
+#endif // #ifndef _ODE__PRIVATE_DEFAULT_THREADING_H_
diff --git a/libs/ode-0.16.1/ode/src/error.cpp b/libs/ode-0.16.1/ode/src/error.cpp
new file mode 100644
index 0000000..0b1a979
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/error.cpp
@@ -0,0 +1,179 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/odeconfig.h>
+#include <ode/error.h>
+#include "config.h"
+
+
+static dMessageFunction *error_function = 0;
+static dMessageFunction *debug_function = 0;
+static dMessageFunction *message_function = 0;
+
+
+extern "C" void dSetErrorHandler (dMessageFunction *fn)
+{
+    error_function = fn;
+}
+
+
+extern "C" void dSetDebugHandler (dMessageFunction *fn)
+{
+    debug_function = fn;
+}
+
+
+extern "C" void dSetMessageHandler (dMessageFunction *fn)
+{
+    message_function = fn;
+}
+
+
+extern "C" dMessageFunction *dGetErrorHandler()
+{
+    return error_function;
+}
+
+
+extern "C" dMessageFunction *dGetDebugHandler()
+{
+    return debug_function;
+}
+
+
+extern "C" dMessageFunction *dGetMessageHandler()
+{
+    return message_function;
+}
+
+
+static void printMessage (int num, const char *msg1, const char *msg2,
+                          va_list ap)
+{
+    fflush (stderr);
+    fflush (stdout);
+    if (num) fprintf (stderr,"\n%s %d: ",msg1,num);
+    else fprintf (stderr,"\n%s: ",msg1);
+    vfprintf (stderr,msg2,ap);
+    fprintf (stderr,"\n");
+    fflush (stderr);
+}
+
+//****************************************************************************
+// unix
+
+#ifndef WIN32
+
+extern "C" void dError (int num, const char *msg, ...)
+{
+    va_list ap;
+    va_start (ap,msg);
+    if (error_function) error_function (num,msg,ap);
+    else printMessage (num,"ODE Error",msg,ap);
+    va_end (ap);
+    exit (1);
+}
+
+
+extern "C" void dDebug (int num, const char *msg, ...)
+{
+    va_list ap;
+    va_start (ap,msg);
+    if (debug_function) debug_function (num,msg,ap);
+    else printMessage (num,"ODE INTERNAL ERROR",msg,ap);
+    va_end (ap);
+    // *((char *)0) = 0;   ... commit SEGVicide
+    abort();
+}
+
+
+extern "C" void dMessage (int num, const char *msg, ...)
+{
+    va_list ap;
+    va_start (ap,msg);
+    if (message_function) message_function (num,msg,ap);
+    else printMessage (num,"ODE Message",msg,ap);
+    va_end (ap);
+}
+
+#endif
+
+//****************************************************************************
+// windows
+
+#ifdef WIN32
+
+// isn't cygwin annoying!
+#ifdef CYGWIN
+#define _snprintf snprintf
+#define _vsnprintf vsnprintf
+#endif
+
+
+#include "windows.h"
+
+
+extern "C" void dError (int num, const char *msg, ...)
+{
+    va_list ap;
+    va_start (ap,msg);
+    if (error_function) error_function (num,msg,ap);
+    else {
+        char s[1000],title[100];
+        _snprintf (title,sizeof(title),"ODE Error %d",num);
+        _vsnprintf (s,sizeof(s),msg,ap);
+        s[sizeof(s)-1] = 0;
+        MessageBox(0,s,title,MB_OK | MB_ICONWARNING);
+    }
+    va_end (ap);
+    exit (1);
+}
+
+
+extern "C" void dDebug (int num, const char *msg, ...)
+{
+    va_list ap;
+    va_start (ap,msg);
+    if (debug_function) debug_function (num,msg,ap);
+    else {
+        char s[1000],title[100];
+        _snprintf (title,sizeof(title),"ODE INTERNAL ERROR %d",num);
+        _vsnprintf (s,sizeof(s),msg,ap);
+        s[sizeof(s)-1] = 0;
+        MessageBox(0,s,title,MB_OK | MB_ICONSTOP);
+    }
+    va_end (ap);
+    abort();
+}
+
+
+extern "C" void dMessage (int num, const char *msg, ...)
+{
+    va_list ap;
+    va_start (ap,msg);
+    if (message_function) message_function (num,msg,ap);
+    else printMessage (num,"ODE Message",msg,ap);
+    va_end (ap);
+}
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/error.h b/libs/ode-0.16.1/ode/src/error.h
new file mode 100644
index 0000000..4f561f8
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/error.h
@@ -0,0 +1,101 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/* Library private error handling functions and macros */
+
+#ifndef _ODE__PRIVATE_ERROR_H_
+#define _ODE__PRIVATE_ERROR_H_
+
+#include <ode/error.h>
+#include <ode/common.h>
+
+
+
+/* debugging:
+ *   IASSERT  is an internal assertion, i.e. a consistency check. if it fails
+ *            we want to know where.
+ *   UASSERT  is a user assertion, i.e. if it fails a nice error message
+ *            should be printed for the user.
+ *   AASSERT  is an arguments assertion, i.e. if it fails "bad argument(s)"
+ *            is printed.
+ *   DEBUGMSG just prints out a message
+ */
+
+#  if defined(__STDC__) && __STDC_VERSION__ >= 199901L
+#    define __FUNCTION__ __func__
+#  endif
+#ifndef dNODEBUG
+#  ifdef __GNUC__
+#    define dIASSERT(a) { if (!(a)) { dDebug (d_ERR_IASSERT, \
+      "assertion \"" #a "\" failed in %s() [%s:%u]",__FUNCTION__,__FILE__,__LINE__); } }
+#    define dUASSERT(a,msg) { if (!(a)) { dDebug (d_ERR_UASSERT, \
+      msg " in %s()", __FUNCTION__); } }
+#    define dDEBUGMSG(msg) { dMessage (d_ERR_UASSERT,				\
+  msg " in %s() [%s:%u]", __FUNCTION__,__FILE__,__LINE__); }
+#  else // not __GNUC__
+#    define dIASSERT(a) { if (!(a)) { dDebug (d_ERR_IASSERT, \
+      "assertion \"" #a "\" failed in %s:%u",__FILE__,__LINE__); } }
+#    define dUASSERT(a,msg) { if (!(a)) { dDebug (d_ERR_UASSERT, \
+      msg " (%s:%u)", __FILE__,__LINE__); } }
+#    define dDEBUGMSG(msg) { dMessage (d_ERR_UASSERT, \
+      msg " (%s:%u)", __FILE__,__LINE__); }
+#  endif
+#  define dIVERIFY(a) dIASSERT(a)
+#  define dUVERIFY(a, msg) dUASSERT(a, msg)
+#else
+#  define dIASSERT(a) ((void)0)
+#  define dUASSERT(a,msg) ((void)0)
+#  define dDEBUGMSG(msg) ((void)0)
+#  define dIVERIFY(a) ((void)(a))
+#  define dUVERIFY(a, msg) ((void)(a))
+#endif
+
+#ifdef __GNUC__
+#define dUNUSED(Name) Name __attribute__((unused))
+#else // not __GNUC__
+#define dUNUSED(Name) Name
+#endif
+
+#if __cplusplus >= 201103L 
+#define dSASSERT(e)  static_assert(e, #e)
+#define dSMSGASSERT(e, message)  static_assert(e, message)
+#else
+#define d_SASSERT_INNER_TOKENPASTE(x, y) x ## y
+#define d_SASSERT_TOKENPASTE(x, y) d_SASSERT_INNER_TOKENPASTE(x, y)
+#define dSASSERT(e) typedef char dUNUSED(d_SASSERT_TOKENPASTE(d_StaticAssertionFailed_, __LINE__)[(e)?1:-1])
+#define dSMSGASSERT(e, message)  dSASSERT(e)
+#endif
+
+#  ifdef __GNUC__
+#    define dICHECK(a) { if (!(a)) { dDebug (d_ERR_IASSERT, \
+      "assertion \"" #a "\" failed in %s() [%s:%u]",__FUNCTION__,__FILE__,__LINE__); *(int *)0 = 0; } }
+#  else // not __GNUC__
+#    define dICHECK(a) { if (!(a)) { dDebug (d_ERR_IASSERT, \
+      "assertion \"" #a "\" failed in %s:%u",__FILE__,__LINE__); *(int *)0 = 0; } }
+#  endif
+
+// Argument assert is a special case of user assert
+#define dAASSERT(a) dUASSERT(a, "Bad argument(s)")
+#define dAVERIFY(a) dUVERIFY(a, "Bad argument(s)")
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/export-dif.cpp b/libs/ode-0.16.1/ode/src/export-dif.cpp
new file mode 100644
index 0000000..450021a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/export-dif.cpp
@@ -0,0 +1,620 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * Export a DIF (Dynamics Interchange Format) file.
+ */
+
+
+// @@@ TODO:
+//	* export all spaces, and geoms in spaces, not just ones attached to bodies
+//	  (separate export function?)
+//	* say the space each geom is in, so reader can construct space heirarchy
+//	* limot --> separate out into limits and motors?
+//	* make sure ODE-specific parameters divided out
+
+
+#include <ode/ode.h>
+#include "config.h"
+#include "objects.h"
+#include "joints/joints.h"
+#include "collision_kernel.h"
+
+//***************************************************************************
+// utility
+
+struct PrintingContext {
+    FILE *file;		// file to write to
+    int precision;		// digits of precision to print
+    int indent;		// number of levels of indent
+
+    void printIndent();
+    void printReal (dReal x);
+    void print (const char *name, int x);
+    void print (const char *name, unsigned x);
+    void print (const char *name, dReal x);
+    void print (const char *name, const dReal *x, int n=3);
+    void print (const char *name, const char *x=0);
+    void printNonzero (const char *name, dReal x);
+    void printNonzero (const char *name, const dReal x[3]);
+};
+
+
+void PrintingContext::printIndent()
+{
+    for (int i=0; i<indent; i++) fputc ('\t',file);
+}
+
+
+void PrintingContext::print (const char *name, int x)
+{
+    printIndent();
+    fprintf (file,"%s = %d,\n",name,x);
+}
+
+void PrintingContext::print (const char *name, unsigned x)
+{
+    printIndent();
+    fprintf (file,"%s = %u,\n",name,x);
+}
+
+void PrintingContext::printReal (dReal x)
+{
+    if (x==dInfinity) {
+        fprintf (file,"inf");
+    }
+    else if (x==-dInfinity) {
+        fprintf (file,"-inf");
+    }
+    else {
+        fprintf (file,"%.*g",precision,x);
+    }
+}
+
+
+void PrintingContext::print (const char *name, dReal x)
+{
+    printIndent();
+    fprintf (file,"%s = ",name);
+    printReal (x);
+    fprintf (file,",\n");
+}
+
+
+void PrintingContext::print (const char *name, const dReal *x, int n)
+{
+    printIndent();
+    fprintf (file,"%s = {",name);
+    for (int i=0; i<n; i++) {
+        printReal (x[i]);
+        if (i < n-1) fputc (',',file);
+    }
+    fprintf (file,"},\n");
+}
+
+
+void PrintingContext::print (const char *name, const char *x)
+{
+    printIndent();
+    if (x) {
+        fprintf (file,"%s = \"%s\",\n",name,x);
+    }
+    else {
+        fprintf (file,"%s\n",name);
+    }
+}
+
+
+void PrintingContext::printNonzero (const char *name, dReal x)
+{
+    if (x != 0) print (name,x);
+}
+
+
+void PrintingContext::printNonzero (const char *name, const dReal x[3])
+{
+    if (x[0] != 0 && x[1] != 0 && x[2] != 0) print (name,x);
+}
+
+//***************************************************************************
+// joints
+
+
+static void printLimot (PrintingContext &c, dxJointLimitMotor &limot, int num)
+{
+    if (num >= 0) {
+        c.printIndent();
+        fprintf (c.file,"limit%d = {\n",num);
+    }
+    else {
+        c.print ("limit = {");
+    }
+    c.indent++;
+    c.print ("low_stop",limot.lostop);
+    c.print ("high_stop",limot.histop);
+    c.printNonzero ("bounce",limot.bounce);
+    c.print ("ODE = {");
+    c.indent++;
+    c.printNonzero ("stop_erp",limot.stop_erp);
+    c.printNonzero ("stop_cfm",limot.stop_cfm);
+    c.indent--;
+    c.print ("},");
+    c.indent--;
+    c.print ("},");
+
+    if (num >= 0) {
+        c.printIndent();
+        fprintf (c.file,"motor%d = {\n",num);
+    }
+    else {
+        c.print ("motor = {");
+    }
+    c.indent++;
+    c.printNonzero ("vel",limot.vel);
+    c.printNonzero ("fmax",limot.fmax);
+    c.print ("ODE = {");
+    c.indent++;
+    c.printNonzero ("fudge_factor",limot.fudge_factor);
+    c.printNonzero ("normal_cfm",limot.normal_cfm);
+    c.indent--;
+    c.print ("},");
+    c.indent--;
+    c.print ("},");
+}
+
+
+static const char *getJointName (dxJoint *j)
+{
+    switch (j->type()) {
+        case dJointTypeBall: return "ball";
+        case dJointTypeHinge: return "hinge";
+        case dJointTypeSlider: return "slider";
+        case dJointTypeContact: return "contact";
+        case dJointTypeUniversal: return "universal";
+        case dJointTypeHinge2: return "ODE_hinge2";
+        case dJointTypeFixed: return "fixed";
+        case dJointTypeNull: return "null";
+        case dJointTypeAMotor: return "ODE_angular_motor";
+        case dJointTypeLMotor: return "ODE_linear_motor";
+        case dJointTypePR: return "PR";
+        case dJointTypePU: return "PU";
+    case dJointTypePiston: return "piston";
+        default: return "unknown";
+    }
+}
+
+
+static void printBall (PrintingContext &c, dxJoint *j)
+{
+    dxJointBall *b = (dxJointBall*) j;
+    c.print ("anchor1",b->anchor1);
+    c.print ("anchor2",b->anchor2);
+}
+
+
+static void printHinge (PrintingContext &c, dxJoint *j)
+{
+    dxJointHinge *h = (dxJointHinge*) j;
+    c.print ("anchor1",h->anchor1);
+    c.print ("anchor2",h->anchor2);
+    c.print ("axis1",h->axis1);
+    c.print ("axis2",h->axis2);
+    c.print ("qrel",h->qrel,4);
+    printLimot (c,h->limot,-1);
+}
+
+
+static void printSlider (PrintingContext &c, dxJoint *j)
+{
+    dxJointSlider *s = (dxJointSlider*) j;
+    c.print ("axis1",s->axis1);
+    c.print ("qrel",s->qrel,4);
+    c.print ("offset",s->offset);
+    printLimot (c,s->limot,-1);
+}
+
+
+static void printContact (PrintingContext &c, dxJoint *j)
+{
+    dxJointContact *ct = (dxJointContact*) j;
+    int mode = ct->contact.surface.mode;
+    c.print ("pos",ct->contact.geom.pos);
+    c.print ("normal",ct->contact.geom.normal);
+    c.print ("depth",ct->contact.geom.depth);
+    //@@@ may want to write the geoms g1 and g2 that are involved, for debugging.
+    //    to do this we must have written out all geoms in all spaces, not just
+    //    geoms that are attached to bodies.
+    c.print ("mu",ct->contact.surface.mu);
+    if (mode & dContactMu2) c.print ("mu2",ct->contact.surface.mu2);
+    if (mode & dContactBounce) c.print ("bounce",ct->contact.surface.bounce);
+    if (mode & dContactBounce) c.print ("bounce_vel",ct->contact.surface.bounce_vel);
+    if (mode & dContactSoftERP) c.print ("soft_ERP",ct->contact.surface.soft_erp);
+    if (mode & dContactSoftCFM) c.print ("soft_CFM",ct->contact.surface.soft_cfm);
+    if (mode & dContactMotion1) c.print ("motion1",ct->contact.surface.motion1);
+    if (mode & dContactMotion2) c.print ("motion2",ct->contact.surface.motion2);
+    if (mode & dContactSlip1) c.print ("slip1",ct->contact.surface.slip1);
+    if (mode & dContactSlip2) c.print ("slip2",ct->contact.surface.slip2);
+    int fa = 0;		// friction approximation code
+    if (mode & dContactApprox1_1) fa |= 1;
+    if (mode & dContactApprox1_2) fa |= 2;
+    if (fa) c.print ("friction_approximation",fa);
+    if (mode & dContactFDir1) c.print ("fdir1",ct->contact.fdir1);
+}
+
+
+static void printUniversal (PrintingContext &c, dxJoint *j)
+{
+    dxJointUniversal *u = (dxJointUniversal*) j;
+    c.print ("anchor1",u->anchor1);
+    c.print ("anchor2",u->anchor2);
+    c.print ("axis1",u->axis1);
+    c.print ("axis2",u->axis2);
+    c.print ("qrel1",u->qrel1,4);
+    c.print ("qrel2",u->qrel2,4);
+    printLimot (c,u->limot1,1);
+    printLimot (c,u->limot2,2);
+}
+
+
+static void printHinge2 (PrintingContext &c, dxJoint *j)
+{
+    dxJointHinge2 *h = (dxJointHinge2*) j;
+    c.print ("anchor1",h->anchor1);
+    c.print ("anchor2",h->anchor2);
+    c.print ("axis1",h->axis1);
+    c.print ("axis2",h->axis2);
+    c.print ("v1",h->v1);	//@@@ much better to write out 'qrel' here, if it's available
+    c.print ("v2",h->v2);
+    c.print ("susp_erp",h->susp_erp);
+    c.print ("susp_cfm",h->susp_cfm);
+    printLimot (c,h->limot1,1);
+    printLimot (c,h->limot2,2);
+}
+
+static void printPR (PrintingContext &c, dxJoint *j)
+{
+    dxJointPR *pr = (dxJointPR*) j;
+    c.print ("anchor2",pr->anchor2);
+    c.print ("axisR1",pr->axisR1);
+    c.print ("axisR2",pr->axisR2);
+    c.print ("axisP1",pr->axisP1);
+    c.print ("qrel",pr->qrel,4);
+    c.print ("offset",pr->offset);
+    printLimot (c,pr->limotP,1);
+    printLimot (c,pr->limotR,2);
+}
+
+static void printPU (PrintingContext &c, dxJoint *j)
+{
+    dxJointPU *pu = (dxJointPU*) j;
+    c.print ("anchor1",pu->anchor1);
+    c.print ("anchor2",pu->anchor2);
+    c.print ("axis1",pu->axis1);
+    c.print ("axis2",pu->axis2);
+    c.print ("axisP",pu->axisP1);
+    c.print ("qrel1",pu->qrel1,4);
+    c.print ("qrel2",pu->qrel2,4);
+    printLimot (c,pu->limot1,1);
+    printLimot (c,pu->limot2,2);
+    printLimot (c,pu->limotP,3);
+}
+
+static void printPiston (PrintingContext &c, dxJoint *j)
+{
+    dxJointPiston *rap = (dxJointPiston*) j;
+    c.print ("anchor1",rap->anchor1);
+    c.print ("anchor2",rap->anchor2);
+    c.print ("axis1",rap->axis1);
+    c.print ("axis2",rap->axis2);
+    c.print ("qrel",rap->qrel,4);
+    printLimot (c,rap->limotP,1);
+    printLimot (c, rap->limotR, 2);
+}
+
+static void printFixed (PrintingContext &c, dxJoint *j)
+{
+    dxJointFixed *f = (dxJointFixed*) j;
+    c.print ("qrel",f->qrel);
+    c.print ("offset",f->offset);
+}
+
+static void printLMotor (PrintingContext &c, dxJoint *j)
+{
+    dxJointLMotor *a = (dxJointLMotor*) j;
+    c.print("num", a->num);
+    c.printIndent();
+    fprintf (c.file,"rel = {%d,%d,%d},\n",a->rel[0],a->rel[1],a->rel[2]);
+    c.print ("axis1",a->axis[0]);
+    c.print ("axis2",a->axis[1]);
+    c.print ("axis3",a->axis[2]);
+    for (int i=0; i<3; i++) printLimot (c,a->limot[i],i+1);
+}
+
+struct dxAMotorJointPrinter
+{
+    static void print(PrintingContext &c, dxJointAMotor *a)
+    {
+        c.print ("num",a->m_num);
+        c.print ("mode",a->m_mode);
+        c.printIndent();
+        fprintf (c.file,"rel = {%d,%d,%d},\n",a->m_rel[0],a->m_rel[1],a->m_rel[2]);
+        c.print ("axis1",a->m_axis[0]);
+        c.print ("axis2",a->m_axis[1]);
+        c.print ("axis3",a->m_axis[2]);
+        for (int i=0; i<3; i++) printLimot (c,a->m_limot[i],i+1);
+        c.print ("angle1",a->m_angle[0]);
+        c.print ("angle2",a->m_angle[1]);
+        c.print ("angle3",a->m_angle[2]);
+    }
+};
+
+static void printAMotor (PrintingContext &c, dxJoint *j)
+{
+    dxJointAMotor *a = (dxJointAMotor*) j;
+    dxAMotorJointPrinter::print(c, a);
+}
+
+//***************************************************************************
+// geometry
+
+static void printGeom (PrintingContext &c, dxGeom *g);
+
+static void printSphere (PrintingContext &c, dxGeom *g)
+{
+    c.print ("type","sphere");
+    c.print ("radius",dGeomSphereGetRadius (g));
+}
+
+
+static void printBox (PrintingContext &c, dxGeom *g)
+{
+    dVector3 sides;
+    dGeomBoxGetLengths (g,sides);
+    c.print ("type","box");
+    c.print ("sides",sides);
+}
+
+
+static void printCapsule (PrintingContext &c, dxGeom *g)
+{
+    dReal radius,length;
+    dGeomCapsuleGetParams (g,&radius,&length);
+    c.print ("type","capsule");
+    c.print ("radius",radius);
+    c.print ("length",length);
+}
+
+
+static void printCylinder (PrintingContext &c, dxGeom *g)
+{
+    dReal radius,length;
+    dGeomCylinderGetParams (g,&radius,&length);
+    c.print ("type","cylinder");
+    c.print ("radius",radius);
+    c.print ("length",length);
+}
+
+
+static void printPlane (PrintingContext &c, dxGeom *g)
+{
+    dVector4 e;
+    dGeomPlaneGetParams (g,e);
+    c.print ("type","plane");
+    c.print ("normal",e);
+    c.print ("d",e[3]);
+}
+
+
+static void printRay (PrintingContext &c, dxGeom *g)
+{
+    dReal length = dGeomRayGetLength (g);
+    c.print ("type","ray");
+    c.print ("length",length);
+}
+
+
+static void printConvex (PrintingContext &c, dxGeom * /*g*/)
+{
+    c.print ("type","convex");
+    ///@todo Print information about convex hull
+}
+
+
+
+static void printTriMesh (PrintingContext &c, dxGeom * /*g*/)
+{
+    c.print ("type","trimesh");
+    //@@@ i don't think that the trimesh accessor functions are really
+    //    sufficient to read out all the triangle data, and anyway we
+    //    should have a method of not duplicating trimesh data that is
+    //    shared.
+}
+
+
+static void printHeightfieldClass (PrintingContext &c, dxGeom * /*g*/)
+{
+    c.print ("type","heightfield");
+    ///@todo Print information about heightfield
+}
+
+
+static void printGeom (PrintingContext &c, dxGeom *g)
+{
+    unsigned long category = dGeomGetCategoryBits (g);
+    if (category != (unsigned long)(~0)) {
+        c.printIndent();
+        fprintf (c.file,"category_bits = %lu\n",category);
+    }
+    unsigned long collide = dGeomGetCollideBits (g);
+    if (collide != (unsigned long)(~0)) {
+        c.printIndent();
+        fprintf (c.file,"collide_bits = %lu\n",collide);
+    }
+    if (!dGeomIsEnabled (g)) {
+        c.print ("disabled",1);
+    }
+    switch (g->type) {
+        case dSphereClass: printSphere (c,g); break;
+        case dBoxClass: printBox (c,g); break;
+        case dCapsuleClass: printCapsule (c,g); break;
+        case dCylinderClass: printCylinder (c,g); break;
+        case dPlaneClass: printPlane (c,g); break;
+        case dRayClass: printRay (c,g); break;
+        case dConvexClass: printConvex (c,g); break;
+        case dTriMeshClass: printTriMesh (c,g); break;
+        case dHeightfieldClass: printHeightfieldClass (c,g); break;
+    }
+}
+
+//***************************************************************************
+// world
+
+void dWorldExportDIF (dWorldID w, FILE *file, const char *prefix)
+{
+    PrintingContext c;
+    c.file = file;
+#if defined(dSINGLE)
+    c.precision = 7;
+#else
+    c.precision = 15;
+#endif
+    c.indent = 1;
+
+    fprintf (file,"-- Dynamics Interchange Format v0.1\n\n%sworld = dynamics.world {\n",prefix);
+    c.print ("gravity",w->gravity);
+    c.print ("ODE = {");
+    c.indent++;
+    c.print ("ERP",w->global_erp);
+    c.print ("CFM",w->global_cfm);
+    c.print ("auto_disable = {");
+    c.indent++;
+    c.print ("linear_threshold",w->adis.linear_average_threshold);
+    c.print ("angular_threshold",w->adis.angular_average_threshold);
+    c.print ("average_samples",(int)w->adis.average_samples);
+    c.print ("idle_time",w->adis.idle_time);
+    c.print ("idle_steps",w->adis.idle_steps);
+    fprintf (file,"\t\t},\n\t},\n}\n");
+    c.indent -= 3;
+
+    // bodies
+    int num = 0;
+    fprintf (file,"%sbody = {}\n",prefix);
+    for (dxBody *b=w->firstbody; b; b=(dxBody*)b->next) {
+        b->tag = num;
+        fprintf (file,"%sbody[%d] = dynamics.body {\n\tworld = %sworld,\n",prefix,num,prefix);
+        c.indent++;
+        c.print ("pos",b->posr.pos);
+        c.print ("q",b->q,4);
+        c.print ("lvel",b->lvel);
+        c.print ("avel",b->avel);
+        c.print ("mass",b->mass.mass);
+        fprintf (file,"\tI = {{");
+        for (int i=0; i<3; i++) {
+            for (int j=0; j<3; j++) {
+                c.printReal (b->mass.I[i*4+j]);
+                if (j < 2) fputc (',',file);
+            }
+            if (i < 2) fprintf (file,"},{");
+        }
+        fprintf (file,"}},\n");
+        c.printNonzero ("com",b->mass.c);
+        c.print ("ODE = {");
+        c.indent++;
+        if (b->flags & dxBodyFlagFiniteRotation) c.print ("finite_rotation",1);
+        if (b->flags & dxBodyDisabled) c.print ("disabled",1);
+        if (b->flags & dxBodyNoGravity) c.print ("no_gravity",1);
+        if (b->flags & dxBodyAutoDisable) {
+            c.print ("auto_disable = {");
+            c.indent++;
+            c.print ("linear_threshold",b->adis.linear_average_threshold);
+            c.print ("angular_threshold",b->adis.angular_average_threshold);
+            c.print ("average_samples",(int)b->adis.average_samples);
+            c.print ("idle_time",b->adis.idle_time);
+            c.print ("idle_steps",b->adis.idle_steps);
+            c.print ("time_left",b->adis_timeleft);
+            c.print ("steps_left",b->adis_stepsleft);
+            c.indent--;
+            c.print ("},");
+        }
+        c.printNonzero ("facc",b->facc);
+        c.printNonzero ("tacc",b->tacc);
+        if (b->flags & dxBodyFlagFiniteRotationAxis) {
+            c.print ("finite_rotation_axis",b->finite_rot_axis);
+        }
+        c.indent--;
+        c.print ("},");
+        if (b->geom) {
+            c.print ("geometry = {");
+            c.indent++;
+            for (dxGeom *g=b->geom; g; g=g->body_next) {
+                c.print ("{");
+                c.indent++;
+                printGeom (c,g);
+                c.indent--;
+                c.print ("},");
+            }
+            c.indent--;
+            c.print ("},");
+        }
+        c.indent--;
+        c.print ("}");
+        num++;
+    }
+
+    // joints
+    num = 0;
+    fprintf (file,"%sjoint = {}\n",prefix);
+    for (dxJoint *j=w->firstjoint; j; j=(dxJoint*)j->next) {
+        c.indent++;
+        const char *name = getJointName (j);
+        fprintf (file,
+            "%sjoint[%d] = dynamics.%s_joint {\n"
+            "\tworld = %sworld,\n"
+            "\tbody = {"
+            ,prefix,num,name,prefix);
+
+        if ( j->node[0].body )
+            fprintf (file,"%sbody[%d]",prefix,j->node[0].body->tag);
+        if ( j->node[1].body )
+            fprintf (file,",%sbody[%d]",prefix,j->node[1].body->tag);
+        fprintf (file,"}\n");
+
+        switch (j->type()) {
+            case dJointTypeBall: printBall (c,j); break;
+            case dJointTypeHinge: printHinge (c,j); break;
+            case dJointTypeSlider: printSlider (c,j); break;
+            case dJointTypeContact: printContact (c,j); break;
+            case dJointTypeUniversal: printUniversal (c,j); break;
+            case dJointTypeHinge2: printHinge2 (c,j); break;
+            case dJointTypeFixed: printFixed (c,j); break;
+            case dJointTypeAMotor: printAMotor (c,j); break;
+            case dJointTypeLMotor: printLMotor (c,j); break;
+            case dJointTypePR: printPR (c,j); break;
+            case dJointTypePU: printPU (c,j); break;
+            case dJointTypePiston: printPiston (c,j); break;
+            default: c.print("unknown joint");
+        }
+        c.indent--;
+        c.print ("}");
+        num++;
+    }
+}
diff --git a/libs/ode-0.16.1/ode/src/fastdot.cpp b/libs/ode-0.16.1/ode/src/fastdot.cpp
new file mode 100644
index 0000000..5594bc5
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastdot.cpp
@@ -0,0 +1,46 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/* generated code, do not edit. */
+
+#include <ode/common.h>
+#include "config.h"
+#include "matrix.h"
+
+#include "fastdot_impl.h"
+
+
+/*extern */
+dReal dxDot (const dReal *a, const dReal *b, unsigned n)
+{
+    return calculateLargeVectorDot<1>(a, b, n);
+}
+
+
+#undef dDot
+
+/*extern */
+dReal dDot (const dReal *a, const dReal *b, int n)
+{
+    return dxDot (a, b, n);
+}
+
diff --git a/libs/ode-0.16.1/ode/src/fastdot_impl.h b/libs/ode-0.16.1/ode/src/fastdot_impl.h
new file mode 100644
index 0000000..f32e717
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastdot_impl.h
@@ -0,0 +1,51 @@
+
+
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_FASTDOT_IMPL_H_
+#define _ODE_FASTDOT_IMPL_H_
+
+
+template<unsigned b_stride>
+dReal calculateLargeVectorDot (const dReal *a, const dReal *b, unsigned n)
+{
+    dReal sum = 0;
+    const dReal *a_end = a + (n & (int)(~3));
+    for (; a != a_end; b += 4 * b_stride, a += 4) {
+        dReal p0 = a[0], p1 = a[1], p2 = a[2], p3 = a[3];
+        dReal q0 = b[0 * b_stride], q1 = b[1 * b_stride], q2 = b[2 * b_stride], q3 = b[3 * b_stride];
+        dReal m0 = p0 * q0;
+        dReal m1 = p1 * q1;
+        dReal m2 = p2 * q2;
+        dReal m3 = p3 * q3;
+        sum += m0 + m1 + m2 + m3;
+    }
+    a_end += (n & 3);
+    for (; a != a_end; b += b_stride, ++a) {
+        sum += (*a) * (*b);
+    }
+    return sum;
+}
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/fastldltfactor.cpp b/libs/ode-0.16.1/ode/src/fastldltfactor.cpp
new file mode 100644
index 0000000..9c1b921
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastldltfactor.cpp
@@ -0,0 +1,462 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/* 
+ * LDLT factorization related code of ThreadedEquationSolverLDLT 
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+
+#include <ode/common.h>
+#include <ode/matrix.h>
+#include <ode/matrix_coop.h>
+#include "config.h"
+#include "threaded_solver_ldlt.h"
+#include "threading_base.h"
+#include "resource_control.h"
+#include "error.h"
+
+#include "fastldltfactor_impl.h"
+
+
+/*static */
+void ThreadedEquationSolverLDLT::estimateCooperativeFactoringLDLTResourceRequirements(
+    dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+    unsigned allowedThreadCount, unsigned rowCount)
+{
+    dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
+    unsigned limitedThreadCount = restrictFactoringLDLTAllowedThreadCount(threading, allowedThreadCount, rowCount);
+
+    if (limitedThreadCount > 1)
+    {
+        doEstimateCooperativeFactoringLDLTResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
+    }
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::cooperativelyFactorLDLT(
+    dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+    dReal *A, dReal *d, unsigned rowCount, unsigned rowSkip)
+{
+    dAASSERT(rowCount != 0);
+
+    dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+    unsigned limitedThreadCount = restrictFactoringLDLTAllowedThreadCount(threading, allowedThreadCount, rowCount);
+
+    if (limitedThreadCount <= 1)
+    {
+        factorMatrixAsLDLT<FLDLT_D_STRIDE>(A, d, rowCount, rowSkip);
+    }
+    else
+    {
+        doCooperativelyFactorLDLTValidated(resourceContainer, limitedThreadCount, A, d, rowCount, rowSkip);
+    }
+}
+
+
+/*static */
+unsigned ThreadedEquationSolverLDLT::restrictFactoringLDLTAllowedThreadCount(
+    dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount)
+{
+    unsigned limitedThreadCount = 1;
+
+#if dCOOPERATIVE_ENABLED
+    const unsigned int solvingBlockStep = FSL1S_BLOCK_SIZE; // Required by the implementation
+    unsigned solvingMaximalBlockCount = deriveSolvingL1StripeBlockCount(rowCount, solvingBlockStep);
+    dIASSERT(deriveSolvingL1StripeThreadCount(FLDLT_COOPERATIVE_BLOCK_COUNT_MINIMUM - 1, 2) > 1);
+
+    if (solvingMaximalBlockCount >= FLDLT_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+    {
+        limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, false);
+    }
+#endif // #if dCOOPERATIVE_ENABLED
+
+    return limitedThreadCount;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::doEstimateCooperativeFactoringLDLTResourceRequirementsValidated(
+    dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+    unsigned allowedThreadCount, unsigned rowCount)
+{
+    const unsigned int solvingBlockStep = FSL1S_BLOCK_SIZE; // Required by the implementation
+    unsigned solvingTotalBlockCount = deriveSolvingL1StripeBlockCount(rowCount, solvingBlockStep);
+    dIASSERT(solvingTotalBlockCount >= 1);
+
+    unsigned solvingLastBlockIndex = solvingTotalBlockCount - 1;
+
+    const unsigned factorizingBlockARows = FFL1S_REGULAR_A_ROWS;
+    unsigned factorizingMaximalBlockCount = deriveScalingAndFactorizingL1StripeBlockCountFromSolvingBlockIndex(solvingLastBlockIndex, solvingBlockStep, factorizingBlockARows);
+
+    unsigned blockSolvingMaximumThreads = deriveSolvingL1StripeThreadCount(solvingLastBlockIndex, allowedThreadCount);
+    unsigned blockFactorizingMaximumThreads = deriveScalingAndFactorizingL1StripeThreadCount(factorizingMaximalBlockCount, allowedThreadCount);
+    unsigned simultaneousCallCount = 1 // Final synchronization point
+        + 2 // intermediate synchronization points
+        + dMACRO_MAX(blockSolvingMaximumThreads, blockFactorizingMaximumThreads);
+
+    FactorizationSolvingL1StripeMemoryEstimates solvingMemoryEstimates;
+    FactorizationScalingAndFactorizingL1StripeMemoryEstimates scalingAndFactorizingEstimates;
+    sizeint solvingMemoryRequired = estimateCooperativelySolvingL1Stripe_XMemoryRequirement(solvingTotalBlockCount, solvingMemoryEstimates);
+    sizeint factorizingMemoryRequired = estimateCooperativelyScalingAndFactorizingL1Stripe_XMemoryRequirement(blockFactorizingMaximumThreads, scalingAndFactorizingEstimates);
+    sizeint totalSizeRequired = solvingMemoryRequired + factorizingMemoryRequired;
+    const unsigned memoryAlignmentRequired = ALLOCATION_DEFAULT_ALIGNMENT;
+
+    unsigned featureRequirement = dxResourceRequirementDescriptor::STOCK_CALLWAIT_REQUIRED;
+    summaryRequirementsDescriptor->mergeAnotherDescriptorIn(totalSizeRequired, memoryAlignmentRequired, simultaneousCallCount, featureRequirement);
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::doCooperativelyFactorLDLTValidated(
+    dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+    dReal *A, dReal *d, unsigned rowCount, unsigned rowSkip)
+{
+    dIASSERT(allowedThreadCount > 1);
+
+    const unsigned int solvingBlockStep = FSL1S_BLOCK_SIZE; // Required by the implementation
+    unsigned solvingTotalBlockCount = deriveSolvingL1StripeBlockCount(rowCount, solvingBlockStep);
+    dIASSERT(solvingTotalBlockCount >= 1);
+
+    unsigned solvingLastBlockIndex = solvingTotalBlockCount - 1;
+
+    const unsigned factorizingBlockARows = FFL1S_REGULAR_A_ROWS;
+    unsigned factorizingMaximalBlockCount = deriveScalingAndFactorizingL1StripeBlockCountFromSolvingBlockIndex(solvingLastBlockIndex, solvingBlockStep, factorizingBlockARows);
+
+    unsigned blockFactorizingMaximumThreads = deriveScalingAndFactorizingL1StripeThreadCount(factorizingMaximalBlockCount, allowedThreadCount);
+
+    dCallWaitID completionWait = resourceContainer->getStockCallWait();
+    dAASSERT(completionWait != NULL);
+
+    FactorizationSolvingL1StripeMemoryEstimates solvingMemoryEstimates;
+    FactorizationScalingAndFactorizingL1StripeMemoryEstimates scalingAndFactorizingEstimates;
+    sizeint solvingMemoryRequired = estimateCooperativelySolvingL1Stripe_XMemoryRequirement(solvingTotalBlockCount, solvingMemoryEstimates);
+    sizeint factorizingMemoryRequired = estimateCooperativelyScalingAndFactorizingL1Stripe_XMemoryRequirement(blockFactorizingMaximumThreads, scalingAndFactorizingEstimates);
+    sizeint totalSizeRequired = solvingMemoryRequired + factorizingMemoryRequired;
+    dIASSERT(totalSizeRequired <= resourceContainer->getMemoryBufferSize());
+
+    void *bufferAllocated = resourceContainer->getMemoryBufferPointer();
+    dIASSERT(bufferAllocated != NULL);
+    dIASSERT(dALIGN_PTR(bufferAllocated, ALLOCATION_DEFAULT_ALIGNMENT) == bufferAllocated);
+
+    atomicord32 solvingBlockCompletionProgress;
+    cellindexint *solvingBlockProgressDescriptors;
+    FactorizationSolveL1StripeCellContext *solvingCellContexts;
+
+    FactorizationFactorizeL1StripeContext *factorizingFactorizationContext;
+
+    void *bufferCurrentLocation = bufferAllocated;
+    bufferCurrentLocation = markCooperativelySolvingL1Stripe_XMemoryStructuresOut(bufferCurrentLocation, solvingMemoryEstimates, solvingBlockProgressDescriptors, solvingCellContexts);
+    bufferCurrentLocation = markCooperativelyScalingAndFactorizingL1Stripe_XMemoryStructuresOut(bufferCurrentLocation, scalingAndFactorizingEstimates, factorizingFactorizationContext);
+    dIVERIFY(bufferCurrentLocation <= (uint8 *)bufferAllocated + totalSizeRequired);
+
+    dCallReleaseeID calculationFinishReleasee;
+    dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+    threading->PostThreadedCall(NULL, &calculationFinishReleasee, 1, NULL, completionWait, &factotLDLT_completion_callback, NULL, 0, "FactorLDLT Completion");
+
+    FactorLDLTWorkerContext workerContext(threading, allowedThreadCount, A, d, solvingTotalBlockCount, rowCount, rowSkip, 
+        solvingBlockCompletionProgress, solvingBlockProgressDescriptors, solvingCellContexts, 
+        factorizingFactorizationContext,
+        calculationFinishReleasee); // The variable must exist in the outer scope
+
+    dIASSERT(solvingTotalBlockCount >= FLDLT_COOPERATIVE_BLOCK_COUNT_MINIMUM);
+    dSASSERT(FLDLT_COOPERATIVE_BLOCK_COUNT_MINIMUM > 2);
+
+    scaleAndFactorizeL1FirstRowStripe_2<FLDLT_D_STRIDE>(workerContext.m_ARow, workerContext.m_d, workerContext.m_rowSkip);
+    workerContext.incrementForNextBlock();
+
+    const unsigned blockIndex = 1;
+    dIASSERT(blockIndex == workerContext.m_solvingBlockIndex);
+
+    initializeCooperativelySolvingL1Stripe_XMemoryStructures(blockIndex, solvingBlockCompletionProgress, solvingBlockProgressDescriptors, solvingCellContexts);
+    unsigned secondBlockSolvingThreadCount = deriveSolvingL1StripeThreadCount(blockIndex, allowedThreadCount);
+
+    dCallReleaseeID secondBlockSolvingSyncReleasee;
+    threading->PostThreadedCall(NULL, &secondBlockSolvingSyncReleasee, secondBlockSolvingThreadCount, NULL, NULL, &factotLDLT_solvingCompleteSync_callback, &workerContext, 0, "FactorLDLT Solving Complete Sync");
+    
+    if (secondBlockSolvingThreadCount > 1)
+    {
+        threading->PostThreadedCallsGroup(NULL, secondBlockSolvingThreadCount - 1, secondBlockSolvingSyncReleasee, &factotLDLT_solvingComplete_callback, &workerContext, "FactorLDLT Solving Complete");
+    }
+
+    factotLDLT_solvingComplete(workerContext, secondBlockSolvingThreadCount - 1);
+    threading->AlterThreadedCallDependenciesCount(secondBlockSolvingSyncReleasee, -1);
+
+    threading->WaitThreadedCallExclusively(NULL, completionWait, NULL, "FactorLDLT End Wait");
+}
+
+
+/*static */
+int ThreadedEquationSolverLDLT::factotLDLT_solvingComplete_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    FactorLDLTWorkerContext *ptrContext = (FactorLDLTWorkerContext *)callContext;
+
+    factotLDLT_solvingComplete(*ptrContext, dCAST_TO_SMALLER(unsigned, callInstanceIndex));
+    
+    return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::factotLDLT_solvingComplete(FactorLDLTWorkerContext &ref_context, unsigned ownThreadIndex)
+{
+    participateSolvingL1Stripe_X<FSL1S_BLOCK_SIZE, FSL1S_REGULAR_B_ROWS>(ref_context.m_A, ref_context.m_ARow, ref_context.m_solvingBlockIndex, ref_context.m_rowSkip, 
+        ref_context.m_refSolvingBlockCompletionProgress, ref_context.m_solvingBlockProgressDescriptors, ref_context.m_solvingCellContexts, ownThreadIndex);
+}
+
+
+/*static */
+int ThreadedEquationSolverLDLT::factotLDLT_solvingCompleteSync_callback(void *callContext, dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    FactorLDLTWorkerContext *ptrContext = (FactorLDLTWorkerContext *)callContext;
+
+    factotLDLT_solvingCompleteSync(*ptrContext);
+
+    return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::factotLDLT_solvingCompleteSync(FactorLDLTWorkerContext &ref_workerContext)
+{
+    unsigned solvingBlockIndex = ref_workerContext.m_solvingBlockIndex;
+    FactorizationFactorizeL1StripeContext *factorizingFactorizationContext = ref_workerContext.m_factorizingFactorizationContext;
+
+    const unsigned int solvingBlockStep = FSL1S_BLOCK_SIZE;
+    const unsigned factorizingBlockARows = FFL1S_REGULAR_A_ROWS;
+    unsigned factorizingBlockCount = deriveScalingAndFactorizingL1StripeBlockCountFromSolvingBlockIndex(solvingBlockIndex, solvingBlockStep, factorizingBlockARows);
+    unsigned blockFactorizingThreadCount = deriveScalingAndFactorizingL1StripeThreadCount(factorizingBlockCount, ref_workerContext.m_allowedThreadCount);
+    initializeCooperativelyScalingAndFactorizingL1Stripe_XMemoryStructures(factorizingFactorizationContext, blockFactorizingThreadCount);
+
+    dCallReleaseeID blockFactorizingSyncReleasee;
+
+    dxThreadingBase *threading = ref_workerContext.m_threading;
+    if (solvingBlockIndex != ref_workerContext.m_totalBlockCount - 1)
+    {
+        threading->PostThreadedCall(NULL, &blockFactorizingSyncReleasee, blockFactorizingThreadCount, NULL, NULL, &factotLDLT_scalingAndFactorizingCompleteSync_callback, &ref_workerContext, 0, "FactorLDLT S'n'F Sync");
+    }
+    else
+    {
+        blockFactorizingSyncReleasee = ref_workerContext.m_calculationFinishReleasee;
+
+        if (blockFactorizingThreadCount > 1)
+        {
+            threading->AlterThreadedCallDependenciesCount(blockFactorizingSyncReleasee, blockFactorizingThreadCount - 1);
+        }
+    }
+
+    if (blockFactorizingThreadCount > 1)
+    {
+        threading->PostThreadedCallsGroup(NULL, blockFactorizingThreadCount - 1, blockFactorizingSyncReleasee, &factotLDLT_scalingAndFactorizingComplete_callback, &ref_workerContext, "FactorLDLT S'n'F Complete");
+    }
+
+    factotLDLT_scalingAndFactorizingComplete(ref_workerContext, blockFactorizingThreadCount - 1);
+    threading->AlterThreadedCallDependenciesCount(blockFactorizingSyncReleasee, -1);
+}
+
+
+/*static */
+int ThreadedEquationSolverLDLT::factotLDLT_scalingAndFactorizingComplete_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    FactorLDLTWorkerContext *ptrContext = (FactorLDLTWorkerContext *)callContext;
+
+    factotLDLT_scalingAndFactorizingComplete(*ptrContext, dCAST_TO_SMALLER(unsigned, callInstanceIndex));
+
+    return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::factotLDLT_scalingAndFactorizingComplete(FactorLDLTWorkerContext &ref_workerContext, unsigned ownThreadIndex)
+{
+    unsigned factorizationRow = ref_workerContext.m_solvingBlockIndex * FSL1S_BLOCK_SIZE;
+    participateScalingAndFactorizingL1Stripe_X<FFL1S_REGULAR_A_ROWS, FLDLT_D_STRIDE>(ref_workerContext.m_ARow, ref_workerContext.m_d, factorizationRow, 
+        ref_workerContext.m_rowSkip, ref_workerContext.m_factorizingFactorizationContext, ownThreadIndex);
+}
+
+
+/*static */
+int ThreadedEquationSolverLDLT::factotLDLT_scalingAndFactorizingCompleteSync_callback(void *callContext, dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    FactorLDLTWorkerContext *ptrContext = (FactorLDLTWorkerContext *)callContext;
+
+    factotLDLT_scalingAndFactorizingCompleteSync(*ptrContext);
+
+    return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::factotLDLT_scalingAndFactorizingCompleteSync(FactorLDLTWorkerContext &ref_workerContext)
+{
+    ref_workerContext.incrementForNextBlock();
+
+    unsigned blockIndex = ref_workerContext.m_solvingBlockIndex;
+    dIASSERT(blockIndex < ref_workerContext.m_totalBlockCount);
+
+    atomicord32 &refSolvingBlockCompletionProgress = ref_workerContext.m_refSolvingBlockCompletionProgress;
+    cellindexint *solvingBlockProgressDescriptors = ref_workerContext.m_solvingBlockProgressDescriptors;
+    FactorizationSolveL1StripeCellContext *solvingCellContexts = ref_workerContext.m_solvingCellContexts;
+
+    initializeCooperativelySolvingL1Stripe_XMemoryStructures(blockIndex, refSolvingBlockCompletionProgress, solvingBlockProgressDescriptors, solvingCellContexts);
+    unsigned blockSolvingThreadCount = deriveSolvingL1StripeThreadCount(blockIndex, ref_workerContext.m_allowedThreadCount);
+
+    dCallReleaseeID blockSolvingSyncReleasee;
+
+    dxThreadingBase *threading = ref_workerContext.m_threading;
+    if (blockIndex != ref_workerContext.m_totalBlockCount - 1 || ref_workerContext.m_rowCount % FSL1S_REGULAR_B_ROWS == 0)
+    {
+        threading->PostThreadedCall(NULL, &blockSolvingSyncReleasee, blockSolvingThreadCount, NULL, NULL, &factotLDLT_solvingCompleteSync_callback, &ref_workerContext, 0, "FactorLDLT Solving Complete Sync");
+
+        if (blockSolvingThreadCount > 1)
+        {
+            threading->PostThreadedCallsGroup(NULL, blockSolvingThreadCount - 1, blockSolvingSyncReleasee, &factotLDLT_solvingComplete_callback, &ref_workerContext, "FactorLDLT Solving Complete");
+        }
+
+        factotLDLT_solvingComplete(ref_workerContext, blockSolvingThreadCount - 1);
+    }
+    else
+    {
+        dSASSERT(FSL1S_REGULAR_B_ROWS == 2);
+        dSASSERT(FSL1S_FINAL_B_ROWS == 1);
+
+        threading->PostThreadedCall(NULL, &blockSolvingSyncReleasee, blockSolvingThreadCount, NULL, NULL, &factotLDLT_solvingFinalSync_callback, &ref_workerContext, 0, "FactorLDLT Solving Final Sync");
+
+        if (blockSolvingThreadCount > 1)
+        {
+            threading->PostThreadedCallsGroup(NULL, blockSolvingThreadCount - 1, blockSolvingSyncReleasee, &factotLDLT_solvingFinal_callback, &ref_workerContext, "FactorLDLT Solving Final");
+        }
+
+        factotLDLT_solvingFinal(ref_workerContext, blockSolvingThreadCount - 1);
+    }
+
+    threading->AlterThreadedCallDependenciesCount(blockSolvingSyncReleasee, -1);
+}
+
+
+/*static */
+int ThreadedEquationSolverLDLT::factotLDLT_solvingFinal_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    FactorLDLTWorkerContext *ptrContext = (FactorLDLTWorkerContext *)callContext;
+
+    factotLDLT_solvingFinal(*ptrContext, dCAST_TO_SMALLER(unsigned, callInstanceIndex));
+
+    return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::factotLDLT_solvingFinal(FactorLDLTWorkerContext &ref_context, unsigned ownThreadIndex)
+{
+    participateSolvingL1Stripe_X<FSL1S_BLOCK_SIZE, FSL1S_FINAL_B_ROWS>(ref_context.m_A, ref_context.m_ARow, ref_context.m_solvingBlockIndex, ref_context.m_rowSkip, 
+        ref_context.m_refSolvingBlockCompletionProgress, ref_context.m_solvingBlockProgressDescriptors, ref_context.m_solvingCellContexts, ownThreadIndex);
+}
+
+
+/*static */
+int ThreadedEquationSolverLDLT::factotLDLT_solvingFinalSync_callback(void *callContext, dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    FactorLDLTWorkerContext *ptrContext = (FactorLDLTWorkerContext *)callContext;
+
+    factotLDLT_solvingFinalSync(*ptrContext);
+
+    return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::factotLDLT_solvingFinalSync(FactorLDLTWorkerContext &ref_workerContext)
+{
+    unsigned solvingBlockIndex = ref_workerContext.m_solvingBlockIndex;
+    FactorizationFactorizeL1StripeContext *factorizingFactorizationContext = ref_workerContext.m_factorizingFactorizationContext;
+
+    const unsigned int solvingBlockStep = FSL1S_BLOCK_SIZE;
+    const unsigned factorizingBlockARows = FFL1S_FINAL_A_ROWS;
+    unsigned factorizingBlockCount = deriveScalingAndFactorizingL1StripeBlockCountFromSolvingBlockIndex(solvingBlockIndex, solvingBlockStep, factorizingBlockARows);
+    unsigned blockFactorizingThreadCount = deriveScalingAndFactorizingL1StripeThreadCount(factorizingBlockCount, ref_workerContext.m_allowedThreadCount);
+    initializeCooperativelyScalingAndFactorizingL1Stripe_XMemoryStructures(factorizingFactorizationContext, blockFactorizingThreadCount);
+
+    dCallReleaseeID blockFactorizingSyncReleasee = ref_workerContext.m_calculationFinishReleasee;
+    dIASSERT(solvingBlockIndex == ref_workerContext.m_totalBlockCount - 1);
+
+    dxThreadingBase *threading = ref_workerContext.m_threading;
+
+    if (blockFactorizingThreadCount > 1)
+    {
+        threading->AlterThreadedCallDependenciesCount(blockFactorizingSyncReleasee, blockFactorizingThreadCount - 1);
+        threading->PostThreadedCallsGroup(NULL, blockFactorizingThreadCount - 1, blockFactorizingSyncReleasee, &factotLDLT_scalingAndFactorizingFinal_callback, &ref_workerContext, "FactorLDLT S'n'F Final");
+    }
+
+    factotLDLT_scalingAndFactorizingFinal(ref_workerContext, blockFactorizingThreadCount - 1);
+    threading->AlterThreadedCallDependenciesCount(blockFactorizingSyncReleasee, -1);
+}
+
+
+/*static */
+int ThreadedEquationSolverLDLT::factotLDLT_scalingAndFactorizingFinal_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    FactorLDLTWorkerContext *ptrContext = (FactorLDLTWorkerContext *)callContext;
+
+    factotLDLT_scalingAndFactorizingFinal(*ptrContext, dCAST_TO_SMALLER(unsigned, callInstanceIndex));
+
+    return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::factotLDLT_scalingAndFactorizingFinal(FactorLDLTWorkerContext &ref_workerContext, unsigned ownThreadIndex)
+{
+    unsigned factorizationRow = ref_workerContext.m_solvingBlockIndex * FSL1S_BLOCK_SIZE;
+    participateScalingAndFactorizingL1Stripe_X<FFL1S_FINAL_A_ROWS, FLDLT_D_STRIDE>(ref_workerContext.m_ARow, ref_workerContext.m_d, factorizationRow, 
+        ref_workerContext.m_rowSkip, ref_workerContext.m_factorizingFactorizationContext, ownThreadIndex);
+}
+
+
+/*static */
+int ThreadedEquationSolverLDLT::factotLDLT_completion_callback(void *dUNUSED(callContext), dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    // Do nothing
+    return 1;
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Public interface functions
+
+
+/*extern ODE_API */
+void dFactorLDLT(dReal *A, dReal *d, int n, int nskip1)
+{
+    factorMatrixAsLDLT<1>(A, d, n, nskip1);
+}
+
+
+/*extern ODE_API */
+void dEstimateCooperativelyFactorLDLTResourceRequirements(dResourceRequirementsID requirements,
+    unsigned maximalAllowedThreadCount, unsigned maximalRowCount)
+{
+    dAASSERT(requirements != NULL);
+
+    dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
+    ThreadedEquationSolverLDLT::estimateCooperativeFactoringLDLTResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalRowCount);
+}
+
+/*extern ODE_API */
+void dCooperativelyFactorLDLT(dResourceContainerID resources, unsigned allowedThreadCount, 
+    dReal *A, dReal *d, unsigned rowCount, unsigned rowSkip)
+{
+    dAASSERT(resources != NULL);
+
+    dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
+    ThreadedEquationSolverLDLT::cooperativelyFactorLDLT(resourceContainer, allowedThreadCount, A, d, rowCount, rowSkip);
+}
diff --git a/libs/ode-0.16.1/ode/src/fastldltfactor_impl.h b/libs/ode-0.16.1/ode/src/fastldltfactor_impl.h
new file mode 100644
index 0000000..8f633d3
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastldltfactor_impl.h
@@ -0,0 +1,1530 @@
+
+
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * Code style improvements and optimizations by Oleh Derevenko ????-2019
+ * LDLT cooperative factorization code of ThreadedEquationSolverLDLT copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")  
+ */
+
+#ifndef _ODE_FASTLDLT_IMPL_H_
+#define _ODE_FASTLDLT_IMPL_H_
+
+
+#include "error.h"
+#include "common.h"
+
+
+static void solveL1Stripe_2 (const dReal *L, dReal *B, unsigned rowCount, unsigned rowSkip);
+template<unsigned int d_stride>
+void scaleAndFactorizeL1Stripe_2(dReal *ARow, dReal *d, unsigned rowIndex, unsigned rowSkip);
+template<unsigned int d_stride>
+inline void scaleAndFactorizeL1FirstRowStripe_2(dReal *ARow, dReal *d, unsigned rowSkip);
+
+static void solveStripeL1_1 (const dReal *L, dReal *B, unsigned rowCount, unsigned rowSkip);
+template<unsigned int d_stride>
+void scaleAndFactorizeL1Stripe_1(dReal *ARow, dReal *d, unsigned rowIndex);
+template<unsigned int d_stride>
+inline void scaleAndFactorizeL1FirstRowStripe_1(dReal *ARow, dReal *d);
+
+
+template<unsigned int d_stride>
+void factorMatrixAsLDLT(dReal *A, dReal *d, unsigned rowCount, unsigned rowSkip)
+{
+    if (rowCount < 1) return;
+
+    dReal *ARow = A;
+    unsigned blockStartRow = 0;
+
+    const unsigned blockStep = 2;
+    const unsigned lastRowIndex = rowCount >= blockStep ? rowCount - blockStep + 1 : 0;
+
+    /* compute blocks of 2 rows */
+    bool subsequentPass = false;
+    for (; blockStartRow < lastRowIndex; subsequentPass = true, ARow += blockStep * rowSkip, blockStartRow += blockStep) 
+    {
+        if (subsequentPass)
+        {
+            /* solve L*(D*l)=a, l is scaled elements in 2 x i block at A(i,0) */
+            solveL1Stripe_2(A, ARow, blockStartRow, rowSkip);
+            scaleAndFactorizeL1Stripe_2<d_stride>(ARow, d, blockStartRow, rowSkip);
+        }
+        else
+        {
+            scaleAndFactorizeL1FirstRowStripe_2<d_stride>(ARow, d, rowSkip);
+        }
+        dSASSERT(blockStep == 2);
+        /* done factorizing 2 x 2 block */
+    }
+
+    /* compute the (less than 2) rows at the bottom */
+    if (!subsequentPass || blockStartRow == lastRowIndex)
+    {
+        dSASSERT(blockStep == 2); // for the blockStartRow == lastRowIndex comparison above
+
+        if (subsequentPass)
+        {
+            solveStripeL1_1(A, ARow, blockStartRow, rowSkip);
+            scaleAndFactorizeL1Stripe_1<d_stride>(ARow, d, blockStartRow);
+        }
+        else
+        {
+            scaleAndFactorizeL1FirstRowStripe_1<d_stride>(ARow, d);
+        }
+        dSASSERT(blockStep == 2);
+        /* done factorizing 1 x 1 block */
+    }
+}
+
+/* solve L*X=B, with B containing 2 right hand sides.
+ * L is an n*n lower triangular matrix with ones on the diagonal.
+ * L is stored by rows and its leading dimension is rowSkip.
+ * B is an n*2 matrix that contains the right hand sides.
+ * B is stored by columns and its leading dimension is also rowSkip.
+ * B is overwritten with X.
+ * this processes blocks of 2*2.
+ * if this is in the factorizer source file, n must be a multiple of 2.
+ */
+static 
+void solveL1Stripe_2(const dReal *L, dReal *B, unsigned rowCount, unsigned rowSkip)
+{
+    dIASSERT(rowCount != 0);
+    dIASSERT(rowCount % 2 == 0);
+
+    /* compute all 2 x 2 blocks of X */
+    unsigned blockStartRow = 0;
+    for (bool exitLoop = false, subsequentPass = false; !exitLoop; subsequentPass = true, exitLoop = (blockStartRow += 2) == rowCount) 
+    {
+        const dReal *ptrLElement;
+        dReal *ptrBElement;
+
+        /* declare variables - Z matrix */
+        dReal Z11, Z12, Z21, Z22;
+
+        /* compute all 2 x 2 block of X, from rows i..i+2-1 */
+        if (subsequentPass)
+        {
+            ptrLElement = L + blockStartRow * rowSkip;
+            ptrBElement = B;
+
+            /* set Z matrix to 0 */
+            Z11 = 0; Z12 = 0; Z21 = 0; Z22 = 0;
+
+            /* the inner loop that computes outer products and adds them to Z */
+            // The iteration starts with even number and decreases it by 2. So, it must end in zero
+            for (unsigned columnCounter = blockStartRow; ;) 
+            {
+                /* declare p and q vectors, etc */
+                dReal p1, q1, p2, q2;
+
+                /* compute outer product and add it to the Z matrix */
+                p1 = ptrLElement[0];
+                q1 = ptrBElement[0];
+                Z11 += p1 * q1;
+                q2 = ptrBElement[rowSkip];
+                Z12 += p1 * q2;
+                p2 = ptrLElement[rowSkip];
+                Z21 += p2 * q1;
+                Z22 += p2 * q2;
+
+                /* compute outer product and add it to the Z matrix */
+                p1 = ptrLElement[1];
+                q1 = ptrBElement[1];
+                Z11 += p1 * q1;
+                q2 = ptrBElement[1 + rowSkip];
+                Z12 += p1 * q2;
+                p2 = ptrLElement[1 + rowSkip];
+                Z21 += p2 * q1;
+                Z22 += p2 * q2;
+
+                if (columnCounter > 6)
+                {
+                    columnCounter -= 6;
+
+                    /* advance pointers */
+                    ptrLElement += 6;
+                    ptrBElement += 6;
+
+                    /* compute outer product and add it to the Z matrix */
+                    p1 = ptrLElement[-4];
+                    q1 = ptrBElement[-4];
+                    Z11 += p1 * q1;
+                    q2 = ptrBElement[-4 + rowSkip];
+                    Z12 += p1 * q2;
+                    p2 = ptrLElement[-4 + rowSkip];
+                    Z21 += p2 * q1;
+                    Z22 += p2 * q2;
+
+                    /* compute outer product and add it to the Z matrix */
+                    p1 = ptrLElement[-3];
+                    q1 = ptrBElement[-3];
+                    Z11 += p1 * q1;
+                    q2 = ptrBElement[-3 + rowSkip];
+                    Z12 += p1 * q2;
+                    p2 = ptrLElement[-3 + rowSkip];
+                    Z21 += p2 * q1;
+                    Z22 += p2 * q2;
+
+                    /* compute outer product and add it to the Z matrix */
+                    p1 = ptrLElement[-2];
+                    q1 = ptrBElement[-2];
+                    Z11 += p1 * q1;
+                    q2 = ptrBElement[-2 + rowSkip];
+                    Z12 += p1 * q2;
+                    p2 = ptrLElement[-2 + rowSkip];
+                    Z21 += p2 * q1;
+                    Z22 += p2 * q2;
+
+                    /* compute outer product and add it to the Z matrix */
+                    p1 = ptrLElement[-1];
+                    q1 = ptrBElement[-1];
+                    Z11 += p1 * q1;
+                    q2 = ptrBElement[-1 + rowSkip];
+                    Z12 += p1 * q2;
+                    p2 = ptrLElement[-1 + rowSkip];
+                    Z21 += p2 * q1;
+                    Z22 += p2 * q2;
+                }
+                else
+                {
+                    /* advance pointers */
+                    ptrLElement += 2;
+                    ptrBElement += 2;
+
+                    if ((columnCounter -= 2) == 0)
+                    {
+                        break;
+                    }
+                }
+                /* end of inner loop */
+            }
+        }
+        else
+        {
+            ptrLElement = L/* + blockStartRow * rowSkip*/; dIASSERT(blockStartRow == 0);
+            ptrBElement = B;
+
+            /* set Z matrix to 0 */
+            Z11 = 0; Z12 = 0; Z21 = 0; Z22 = 0;
+        }
+
+        /* finish computing the X(i) block */
+        
+        dReal Y11 = ptrBElement[0] - Z11;
+        dReal Y12 = ptrBElement[rowSkip] - Z12;
+
+        dReal p2 = ptrLElement[rowSkip];
+
+        ptrBElement[0] = Y11;
+        ptrBElement[rowSkip] = Y12;
+
+        dReal Y21 = ptrBElement[1] - Z21 - p2 * Y11;
+        dReal Y22 = ptrBElement[1 + rowSkip] - Z22 - p2 * Y12;
+
+        ptrBElement[1] = Y21;
+        ptrBElement[1 + rowSkip] = Y22;
+        /* end of outer loop */
+    }
+}
+
+template<unsigned int d_stride>
+void scaleAndFactorizeL1Stripe_2(dReal *ARow, dReal *d, unsigned factorizationRow, unsigned rowSkip)
+{
+    dIASSERT(factorizationRow != 0);
+    dIASSERT(factorizationRow % 2 == 0);
+
+    dReal *ptrAElement = ARow;
+    dReal *ptrDElement = d;
+
+    /* scale the elements in a 2 x i block at A(i,0), and also */
+    /* compute Z = the outer product matrix that we'll need. */
+    dReal Z11 = 0, Z21 = 0, Z22 = 0;
+
+    for (unsigned columnCounter = factorizationRow; ; ) 
+    {
+        dReal p1, q1, p2, q2, dd;
+
+        p1 = ptrAElement[0];
+        p2 = ptrAElement[rowSkip];
+        dd = ptrDElement[0 * d_stride];
+        q1 = p1 * dd;
+        q2 = p2 * dd;
+        ptrAElement[0] = q1;
+        ptrAElement[rowSkip] = q2;
+        Z11 += p1 * q1;
+        Z21 += p2 * q1;
+        Z22 += p2 * q2;
+
+        p1 = ptrAElement[1];
+        p2 = ptrAElement[1 + rowSkip];
+        dd = ptrDElement[1 * d_stride];
+        q1 = p1 * dd;
+        q2 = p2 * dd;
+        ptrAElement[1] = q1;
+        ptrAElement[1 + rowSkip] = q2;
+        Z11 += p1 * q1;
+        Z21 += p2 * q1;
+        Z22 += p2 * q2;
+
+        if (columnCounter > 6)
+        {
+            columnCounter -= 6;
+
+            ptrAElement += 6;
+            ptrDElement += 6 * d_stride;
+
+            p1 = ptrAElement[-4];
+            p2 = ptrAElement[-4 + rowSkip];
+            dd = ptrDElement[-4 * (int)d_stride];
+            q1 = p1 * dd;
+            q2 = p2 * dd;
+            ptrAElement[-4] = q1;
+            ptrAElement[-4 + rowSkip] = q2;
+            Z11 += p1 * q1;
+            Z21 += p2 * q1;
+            Z22 += p2 * q2;
+
+            p1 = ptrAElement[-3];
+            p2 = ptrAElement[-3 + rowSkip];
+            dd = ptrDElement[-3 * (int)d_stride];
+            q1 = p1 * dd;
+            q2 = p2 * dd;
+            ptrAElement[-3] = q1;
+            ptrAElement[-3 + rowSkip] = q2;
+            Z11 += p1 * q1;
+            Z21 += p2 * q1;
+            Z22 += p2 * q2;
+
+            p1 = ptrAElement[-2];
+            p2 = ptrAElement[-2 + rowSkip];
+            dd = ptrDElement[-2 * (int)d_stride];
+            q1 = p1 * dd;
+            q2 = p2 * dd;
+            ptrAElement[-2] = q1;
+            ptrAElement[-2 + rowSkip] = q2;
+            Z11 += p1 * q1;
+            Z21 += p2 * q1;
+            Z22 += p2 * q2;
+
+            p1 = ptrAElement[-1];
+            p2 = ptrAElement[-1 + rowSkip];
+            dd = ptrDElement[-1 * (int)d_stride];
+            q1 = p1 * dd;
+            q2 = p2 * dd;
+            ptrAElement[-1] = q1;
+            ptrAElement[-1 + rowSkip] = q2;
+            Z11 += p1 * q1;
+            Z21 += p2 * q1;
+            Z22 += p2 * q2;
+        }
+        else
+        {
+            ptrAElement += 2;
+            ptrDElement += 2 * d_stride;
+
+            if ((columnCounter -= 2) == 0)
+            {
+                break;
+            }
+        }
+    }
+
+    /* solve for diagonal 2 x 2 block at A(i,i) */
+    dReal Y11 = ptrAElement[0] - Z11;
+    dReal Y21 = ptrAElement[rowSkip] - Z21;
+    dReal Y22 = ptrAElement[1 + rowSkip] - Z22;
+
+    /* factorize 2 x 2 block Y, ptrDElement */
+    /* factorize row 1 */
+    dReal dd = dRecip(Y11);
+
+    ptrDElement[0 * d_stride] = dd;
+    dIASSERT(ptrDElement == d + (sizeint)factorizationRow * d_stride);
+
+    /* factorize row 2 */
+    dReal q2 = Y21 * dd;
+    ptrAElement[rowSkip] = q2;
+
+    dReal sum = Y21 * q2;
+    ptrDElement[1 * d_stride] = dRecip(Y22 - sum);
+}
+
+template<unsigned int d_stride>
+void scaleAndFactorizeL1FirstRowStripe_2(dReal *ARow, dReal *d, unsigned rowSkip)
+{
+    dReal *ptrAElement = ARow;
+    dReal *ptrDElement = d;
+
+    /* solve for diagonal 2 x 2 block at A(0,0) */
+    dReal Y11 = ptrAElement[0]/* - Z11*/;
+    dReal Y21 = ptrAElement[rowSkip]/* - Z21*/;
+    dReal Y22 = ptrAElement[1 + rowSkip]/* - Z22*/;
+
+    /* factorize 2 x 2 block Y, ptrDElement */
+    /* factorize row 1 */
+    dReal dd = dRecip(Y11);
+
+    ptrDElement[0 * d_stride] = dd;
+    dIASSERT(ptrDElement == d/* + (sizeint)factorizationRow * d_stride*/);
+
+    /* factorize row 2 */
+    dReal q2 = Y21 * dd;
+    ptrAElement[rowSkip] = q2;
+
+    dReal sum = Y21 * q2;
+    ptrDElement[1 * d_stride] = dRecip(Y22 - sum);
+}
+
+
+/* solve L*X=B, with B containing 1 right hand sides.
+ * L is an n*n lower triangular matrix with ones on the diagonal.
+ * L is stored by rows and its leading dimension is lskip.
+ * B is an n*1 matrix that contains the right hand sides.
+ * B is stored by columns and its leading dimension is also lskip.
+ * B is overwritten with X.
+ * this processes blocks of 2*2.
+ * if this is in the factorizer source file, n must be a multiple of 2.
+ */
+static 
+void solveStripeL1_1(const dReal *L, dReal *B, unsigned rowCount, unsigned rowSkip)
+{
+    dIASSERT(rowCount != 0);
+    dIASSERT(rowCount % 2 == 0);
+
+    /* compute all 2 x 1 blocks of X */
+    unsigned blockStartRow = 0;
+    for (bool exitLoop = false, subsequentPass = false; !exitLoop; subsequentPass = true, exitLoop = (blockStartRow += 2) == rowCount) 
+    {
+        const dReal *ptrLElement;
+        dReal *ptrBElement;
+
+        /* declare variables - Z matrix */
+        dReal Z11, Z21;
+
+        if (subsequentPass)
+        {
+            ptrLElement = L + (sizeint)blockStartRow * rowSkip;
+            ptrBElement = B;
+
+            /* set the Z matrix to 0 */
+            Z11 = 0; Z21 = 0;
+
+            /* compute all 2 x 1 block of X, from rows i..i+2-1 */
+            
+            /* the inner loop that computes outer products and adds them to Z */
+            // The iteration starts with even number and decreases it by 2. So, it must end in zero
+            for (unsigned columnCounter = blockStartRow; ; ) 
+            {
+                /* declare p and q vectors, etc */
+                dReal p1, q1, p2;
+
+                /* compute outer product and add it to the Z matrix */
+                p1 = ptrLElement[0];
+                q1 = ptrBElement[0];
+                Z11 += p1 * q1;
+                p2 = ptrLElement[rowSkip];
+                Z21 += p2 * q1;
+                
+                /* compute outer product and add it to the Z matrix */
+                p1 = ptrLElement[1];
+                q1 = ptrBElement[1];
+                Z11 += p1 * q1;
+                p2 = ptrLElement[1 + rowSkip];
+                Z21 += p2 * q1;
+
+                if (columnCounter > 6)
+                {
+                    columnCounter -= 6;
+
+                    /* advance pointers */
+                    ptrLElement += 6;
+                    ptrBElement += 6;
+
+                    /* compute outer product and add it to the Z matrix */
+                    p1 = ptrLElement[-4];
+                    q1 = ptrBElement[-4];
+                    Z11 += p1 * q1;
+                    p2 = ptrLElement[-4 + rowSkip];
+                    Z21 += p2 * q1;
+
+                    /* compute outer product and add it to the Z matrix */
+                    p1 = ptrLElement[-3];
+                    q1 = ptrBElement[-3];
+                    Z11 += p1 * q1;
+                    p2 = ptrLElement[-3 + rowSkip];
+                    Z21 += p2 * q1;
+
+                    /* compute outer product and add it to the Z matrix */
+                    p1 = ptrLElement[-2];
+                    q1 = ptrBElement[-2];
+                    Z11 += p1 * q1;
+                    p2 = ptrLElement[-2 + rowSkip];
+                    Z21 += p2 * q1;
+
+                    /* compute outer product and add it to the Z matrix */
+                    p1 = ptrLElement[-1];
+                    q1 = ptrBElement[-1];
+                    Z11 += p1 * q1;
+                    p2 = ptrLElement[-1 + rowSkip];
+                    Z21 += p2 * q1;
+                }
+                else
+                {
+                    /* advance pointers */
+                    ptrLElement += 2;
+                    ptrBElement += 2;
+
+                    if ((columnCounter -= 2) == 0)
+                    {
+                        break;
+                    }
+                }
+                /* end of inner loop */
+            }
+        }
+        else
+        {
+            ptrLElement = L/* + (sizeint)blockStartRow * rowSkip*/; dIASSERT(blockStartRow == 0);
+            ptrBElement = B;
+
+            /* set the Z matrix to 0 */
+            Z11 = 0; Z21 = 0;
+        }
+        
+        /* finish computing the X(i) block */
+        dReal p2 = ptrLElement[rowSkip];
+
+        dReal Y11 = ptrBElement[0] - Z11;
+        dReal Y21 = ptrBElement[1] - Z21 - p2 * Y11;
+
+        ptrBElement[0] = Y11;
+        ptrBElement[1] = Y21;
+        /* end of outer loop */
+    }
+}
+
+template<unsigned int d_stride>
+void scaleAndFactorizeL1Stripe_1(dReal *ARow, dReal *d, unsigned factorizationRow)
+{
+    dReal *ptrAElement = ARow;
+    dReal *ptrDElement = d;
+
+    /* scale the elements in a 1 x i block at A(i,0), and also */
+    /* compute Z = the outer product matrix that we'll need. */
+    dReal Z11 = 0, Z22 = 0;
+
+    for (unsigned columnCounter = factorizationRow; ; ) 
+    {
+        dReal p1, p2, q1, q2, dd1, dd2;
+
+        p1 = ptrAElement[0];
+        p2 = ptrAElement[1];
+        dd1 = ptrDElement[0 * d_stride];
+        dd2 = ptrDElement[1 * d_stride];
+        q1 = p1 * dd1;
+        q2 = p2 * dd2;
+        ptrAElement[0] = q1;
+        ptrAElement[1] = q2;
+        Z11 += p1 * q1;
+        Z22 += p2 * q2;
+
+        if (columnCounter > 6)
+        {
+            columnCounter -= 6;
+
+            ptrAElement += 6;
+            ptrDElement += 6 * d_stride;
+
+            p1 = ptrAElement[-4];
+            p2 = ptrAElement[-3];
+            dd1 = ptrDElement[-4 * (int)d_stride];
+            dd2 = ptrDElement[-3 * (int)d_stride];
+            q1 = p1 * dd1;
+            q2 = p2 * dd2;
+            ptrAElement[-4] = q1;
+            ptrAElement[-3] = q2;
+            Z11 += p1 * q1;
+            Z22 += p2 * q2;
+
+            p1 = ptrAElement[-2];
+            p2 = ptrAElement[-1];
+            dd1 = ptrDElement[-2 * (int)d_stride];
+            dd2 = ptrDElement[-1 * (int)d_stride];
+            q1 = p1 * dd1;
+            q2 = p2 * dd2;
+            ptrAElement[-2] = q1;
+            ptrAElement[-1] = q2;
+            Z11 += p1 * q1;
+            Z22 += p2 * q2;
+        }
+        else
+        {
+            ptrAElement += 2;
+            ptrDElement += 2 * d_stride;
+
+            if ((columnCounter -= 2) == 0)
+            {
+                break;
+            }
+        }
+    }
+
+    dReal Y11 = ptrAElement[0] - (Z11 + Z22);
+
+    /* solve for diagonal 1 x 1 block at A(i,i) */
+    dIASSERT(ptrDElement == d + (sizeint)factorizationRow * d_stride);
+    /* factorize 1 x 1 block Y, ptrDElement */
+    /* factorize row 1 */
+    ptrDElement[0 * d_stride] = dRecip(Y11);
+}
+
+template<unsigned int d_stride>
+void scaleAndFactorizeL1FirstRowStripe_1(dReal *ARow, dReal *d)
+{
+    dReal *ptrAElement = ARow;
+    dReal *ptrDElement = d;
+
+    dReal Y11 = ptrAElement[0];
+
+    /* solve for diagonal 1 x 1 block at A(0,0) */
+    /* factorize 1 x 1 block Y, ptrDElement */
+    /* factorize row 1 */
+    ptrDElement[0 * d_stride] = dRecip(Y11);
+}
+
+
+
+
+template<unsigned int block_step, unsigned int b_rows>
+/*static */
+void ThreadedEquationSolverLDLT::participateSolvingL1Stripe_X(const dReal *L, dReal *B, unsigned blockCount, unsigned rowSkip, 
+    volatile atomicord32 &refBlockCompletionProgress/*=0*/, volatile cellindexint *blockProgressDescriptors/*=[blockCount]*/, 
+    FactorizationSolveL1StripeCellContext *cellContexts/*=[CCI__MAX x blockCount] + [blockCount]*/, unsigned ownThreadIndex)
+{
+    const unsigned lookaheadRange = 64;
+    BlockProcessingState blockProcessingState = BPS_NO_BLOCKS_PROCESSED;
+
+    unsigned completedBlocks = refBlockCompletionProgress;
+    unsigned currentBlock = completedBlocks;
+    dIASSERT(completedBlocks <= blockCount);
+
+    for (bool exitLoop = completedBlocks == blockCount; !exitLoop; exitLoop = false)
+    {
+        bool goForLockedBlockPrimaryCalculation = false, goForLockedBlockDuplicateCalculation = false;
+        bool goAssigningTheResult = false, stayWithinTheBlock = false;
+
+        dReal Z[block_step][b_rows];
+        dReal Y[block_step][b_rows];
+
+        dReal *ptrBElement;
+
+        CellContextInstance previousContextInstance;
+        unsigned completedColumnBlock;
+
+        for (cellindexint testDescriptor = blockProgressDescriptors[currentBlock]; ; )
+        {
+            if (testDescriptor == INVALID_CELLDESCRIPTOR)
+            {
+                // Invalid descriptor is the indication that the row has been fully calculated
+                // Test if this was the last row and break out if so.
+                if (currentBlock + 1 == blockCount)
+                {
+                    exitLoop = true;
+                    break;
+                }
+
+                // Treat detected row advancement as a row processed
+                // blockProcessingState = BPS_SOME_BLOCKS_PROCESSED; <-- performs better without it
+                break;
+            }
+            
+            CooperativeAtomics::AtomicReadReorderBarrier();
+            // It is necessary to read up to date completedBblocks value after the descriptor retrieval
+            // as otherwise the logic below breaks
+            completedBlocks = refBlockCompletionProgress;
+
+            if (!GET_CELLDESCRIPTOR_ISLOCKED(testDescriptor))
+            {
+                completedColumnBlock = GET_CELLDESCRIPTOR_COLUMNINDEX(testDescriptor);
+                dIASSERT(completedColumnBlock < currentBlock || (completedColumnBlock == currentBlock && currentBlock == 0)); // Otherwise, why would the calculation have had stopped if the final column is reachable???
+                dIASSERT(completedColumnBlock <= completedBlocks); // Since the descriptor is not locked
+
+                if (completedColumnBlock == completedBlocks && currentBlock != completedBlocks)
+                {
+                    dIASSERT(completedBlocks < currentBlock);
+                    break;
+                }
+
+                if (CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], testDescriptor, MARK_CELLDESCRIPTOR_LOCKED(testDescriptor)))
+                {
+                    if (completedColumnBlock != 0)
+                    {
+                        CellContextInstance contextInstance = GET_CELLDESCRIPTOR_CONTEXTINSTANCE(testDescriptor);
+                        previousContextInstance = contextInstance;
+
+                        const FactorizationSolveL1StripeCellContext &sourceContext = buildBlockContextRef(cellContexts, currentBlock, contextInstance);
+                        sourceContext.loadPrecalculatedZs(Z);
+                    }
+                    else
+                    {
+                        previousContextInstance = CCI__MIN;
+                        FactorizationSolveL1StripeCellContext::initializePrecalculatedZs(Z);
+                    }
+
+                    goForLockedBlockPrimaryCalculation = true;
+                    break;
+                }
+
+                if (blockProcessingState != BPS_COMPETING_FOR_A_BLOCK)
+                {
+                    break;
+                }
+
+                testDescriptor = blockProgressDescriptors[currentBlock];
+            }
+            else
+            {
+                if (blockProcessingState != BPS_COMPETING_FOR_A_BLOCK)
+                {
+                    break;
+                }
+
+                cellindexint verificativeDescriptor;
+                bool verificationFailure = false;
+
+                completedColumnBlock = GET_CELLDESCRIPTOR_COLUMNINDEX(testDescriptor);
+                dIASSERT(completedColumnBlock != currentBlock || currentBlock == 0); // There is no reason for computations to stop at the very end other than being the initial value at the very first block
+
+                if (completedColumnBlock != 0)
+                {
+                    CellContextInstance contextInstance = GET_CELLDESCRIPTOR_CONTEXTINSTANCE(testDescriptor);
+                    const FactorizationSolveL1StripeCellContext &sourceContext = buildBlockContextRef(cellContexts, currentBlock, contextInstance);
+                    sourceContext.loadPrecalculatedZs(Z);
+                }
+                else
+                {
+                    FactorizationSolveL1StripeCellContext::initializePrecalculatedZs(Z);
+                }
+
+                if (completedColumnBlock != 0 && completedColumnBlock <= currentBlock)
+                {
+                    // Make sure the descriptor is re-read after the precalculates
+                    CooperativeAtomics::AtomicReadReorderBarrier();
+                }
+
+                if (completedColumnBlock <= currentBlock)
+                {
+                    verificativeDescriptor = blockProgressDescriptors[currentBlock];
+                    verificationFailure = verificativeDescriptor != testDescriptor;
+                }
+
+                if (!verificationFailure)
+                {
+                    dIASSERT(completedColumnBlock <= currentBlock + 1);
+
+                    goForLockedBlockDuplicateCalculation = true;
+                    break;
+                }
+
+                testDescriptor = verificativeDescriptor;
+            }
+        }
+
+        if (exitLoop)
+        {
+            break;
+        }
+
+        if (goForLockedBlockPrimaryCalculation)
+        {
+            blockProcessingState = BPS_SOME_BLOCKS_PROCESSED;
+
+            // Declare and assign the variables at the top to not interfere with any branching -- the compiler is going to eliminate them anyway.
+            bool handleComputationTakenOver = false, rowEndReached = false;
+
+            const dReal *ptrLElement;
+            unsigned finalColumnBlock;
+
+            if (currentBlock != 0)
+            {
+                /* compute all 2 x 2 block of X, from rows i..i+2-1 */
+                ptrLElement = L + (currentBlock * rowSkip + completedColumnBlock) * block_step;
+                ptrBElement = B + completedColumnBlock * block_step;
+
+                /* the inner loop that computes outer products and adds them to Z */
+                finalColumnBlock = dMACRO_MIN(currentBlock, completedBlocks);
+                dIASSERT(completedColumnBlock != finalColumnBlock/* || currentBlock == 0*/);
+
+                // The iteration starts with even number and decreases it by 2. So, it must end in zero
+                for (unsigned columnCounter = finalColumnBlock - completedColumnBlock; ; )
+                {
+                    /* declare p and q vectors, etc */
+                    dReal p[block_step], q[b_rows];
+
+                    /* compute outer product and add it to the Z matrix */
+                    p[0] = ptrLElement[0];
+                    q[0] = ptrBElement[0];
+                    Z[0][0] += p[0] * q[0];
+                    if (b_rows >= 2)
+                    {
+                        q[1] = ptrBElement[rowSkip];
+                        Z[0][1] += p[0] * q[1];
+                    }
+                    p[1] = ptrLElement[rowSkip];
+                    Z[1][0] += p[1] * q[0];
+                    if (b_rows >= 2)
+                    {
+                        Z[1][1] += p[1] * q[1];
+                    }
+
+                    /* compute outer product and add it to the Z matrix */
+                    p[0] = ptrLElement[1];
+                    q[0] = ptrBElement[1];
+                    Z[0][0] += p[0] * q[0];
+                    if (b_rows >= 2)
+                    {
+                        q[1] = ptrBElement[1 + rowSkip];
+                        Z[0][1] += p[0] * q[1];
+                    }
+                    p[1] = ptrLElement[1 + rowSkip];
+                    Z[1][0] += p[1] * q[0];
+                    if (b_rows >= 2)
+                    {
+                        Z[1][1] += p[1] * q[1];
+                    }
+                    
+                    dSASSERT(block_step == 2);
+                    dSASSERT(b_rows >= 1 && b_rows <= 2);
+
+                    if (columnCounter > 2)
+                    {
+                        /* compute outer product and add it to the Z matrix */
+                        p[0] = ptrLElement[2];
+                        q[0] = ptrBElement[2];
+                        Z[0][0] += p[0] * q[0];
+                        if (b_rows >= 2)
+                        {
+                            q[1] = ptrBElement[2 + rowSkip];
+                            Z[0][1] += p[0] * q[1];
+                        }
+                        p[1] = ptrLElement[2 + rowSkip];
+                        Z[1][0] += p[1] * q[0];
+                        if (b_rows >= 2)
+                        {
+                            Z[1][1] += p[1] * q[1];
+                        }
+
+                        /* compute outer product and add it to the Z matrix */
+                        p[0] = ptrLElement[3];
+                        q[0] = ptrBElement[3];
+                        Z[0][0] += p[0] * q[0];
+                        if (b_rows >= 2)
+                        {
+                            q[1] = ptrBElement[3 + rowSkip];
+                            Z[0][1] += p[0] * q[1];
+                        }
+                        p[1] = ptrLElement[3 + rowSkip];
+                        Z[1][0] += p[1] * q[0];
+                        if (b_rows >= 2)
+                        {
+                            Z[1][1] += p[1] * q[1];
+                        }
+
+                        dSASSERT(block_step == 2);
+                        dSASSERT(b_rows >= 1 && b_rows <= 2);
+
+                        /* advance pointers */
+                        ptrLElement += 2 * block_step;
+                        ptrBElement += 2 * block_step;
+                        columnCounter -= 2;
+                    }
+                    else
+                    {
+                        /* advance pointers */
+                        ptrLElement += block_step;
+                        ptrBElement += block_step;
+                        /* end of inner loop */
+
+                        if (--columnCounter == 0)
+                        {
+                            if (finalColumnBlock == currentBlock)
+                            {
+                                rowEndReached = true;
+                                break;
+                            }
+
+                            // Take a look if any more rows have been completed...
+                            completedBlocks = refBlockCompletionProgress;
+                            dIASSERT(completedBlocks >= finalColumnBlock);
+
+                            if (completedBlocks == finalColumnBlock)
+                            {
+                                break;
+                            }
+
+                            // ...continue if so.
+                            unsigned columnCompletedSoFar = finalColumnBlock;
+                            finalColumnBlock = dMACRO_MIN(currentBlock, completedBlocks);
+                            columnCounter = finalColumnBlock - columnCompletedSoFar;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                ptrLElement = L/* + (currentBlock * rowSkip + completedColumnBlock) * block_step*/;
+                ptrBElement = B/* + completedColumnBlock * block_step*/;
+
+                rowEndReached = true;
+            }
+
+            if (rowEndReached)
+            {
+                // Check whether there is still a need to proceed or if the computation has been taken over by another thread
+                cellindexint oldDescriptor = MAKE_CELLDESCRIPTOR(completedColumnBlock, previousContextInstance, true);
+                
+                if (blockProgressDescriptors[currentBlock] == oldDescriptor)
+                {
+                    /* finish computing the X(i) block */
+                    Y[0][0] = ptrBElement[0] - Z[0][0];
+                    if (b_rows >= 2)
+                    {
+                        Y[0][1] = ptrBElement[rowSkip] - Z[0][1];
+                    }
+
+                    dReal p2 = ptrLElement[rowSkip];
+
+                    Y[1][0] = ptrBElement[1] - Z[1][0] - p2 * Y[0][0];
+                    if (b_rows >= 2)
+                    {
+                        Y[1][1] = ptrBElement[1 + rowSkip] - Z[1][1] - p2 * Y[0][1];
+                    }
+
+                    dSASSERT(block_step == 2);
+                    dSASSERT(b_rows >= 1 && b_rows <= 2);
+
+                    // Use atomic memory barrier to make sure memory reads of ptrBElement[] and blockProgressDescriptors[] are not swapped
+                    CooperativeAtomics::AtomicReadReorderBarrier();
+                    
+                    // The descriptor has not been altered yet - this means the ptrBElement[] values used above were not modified yet 
+                    // and the computation result is valid.
+                    if (blockProgressDescriptors[currentBlock] == oldDescriptor)
+                    {
+                        // Assign the results to the result context (possibly in parallel with other threads 
+                        // that could and ought to be assigning exactly the same values)
+                        FactorizationSolveL1StripeCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                        resultContext.storePrecalculatedZs(Y);
+
+                        // Assign the result assignment progress descriptor
+                        cellindexint newDescriptor = MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true);
+                        CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], oldDescriptor, newDescriptor); // the result is to be ignored
+
+                        // Whether succeeded or not, the result is valid, so go on trying to assign it to the matrix
+                        goAssigningTheResult = true;
+                    }
+                    else
+                    {
+                        // Otherwise, go on competing for copying the results
+                        handleComputationTakenOver = true;
+                    }
+                }
+                else
+                {
+                    handleComputationTakenOver = true;
+                }
+            }
+            else
+            {
+                // If the final column has not been reached yet, store current values to the context.
+                // Select the other context instance as the previous one might be read by other threads.
+                CellContextInstance nextContextInstance = buildNextContextInstance(previousContextInstance);
+                FactorizationSolveL1StripeCellContext &destinationContext = buildBlockContextRef(cellContexts, currentBlock, nextContextInstance);
+                destinationContext.storePrecalculatedZs(Z);
+
+                // Unlock the row until more columns can be used
+                cellindexint oldDescriptor = MAKE_CELLDESCRIPTOR(completedColumnBlock, previousContextInstance, true);
+                cellindexint newDescriptor = MAKE_CELLDESCRIPTOR(finalColumnBlock, nextContextInstance, false);
+                // The descriptor might have been updated by a competing thread
+                if (!CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], oldDescriptor, newDescriptor))
+                {
+                    // Adjust the ptrBElement to point to the result area...
+                    ptrBElement = B + currentBlock * block_step;
+                    // ...and go on handling the case
+                    handleComputationTakenOver = true;
+                }
+            }
+
+            if (handleComputationTakenOver)
+            {
+                cellindexint existingDescriptor = blockProgressDescriptors[currentBlock];
+                // This can only happen if the row was (has become) the uppermost not fully completed one
+                // and the competing thread is at final stage of calculation (i.e., it has reached the currentBlock column).
+                if (existingDescriptor != INVALID_CELLDESCRIPTOR)
+                {
+                    // If not fully completed this must be the final stage of the result assignment into the matrix
+                    dIASSERT(existingDescriptor == MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true));
+
+                    // Go on competing copying the result as anyway the block is the topmost not completed one
+                    // and since there was competition for it, there is no other work that can be done right now.
+                    const FactorizationSolveL1StripeCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                    resultContext.loadPrecalculatedZs(Y);
+
+                    goAssigningTheResult = true;
+                }
+                else 
+                {
+                    // everything is over -- just go handling next blocks
+                }
+            }
+        }
+        else if (goForLockedBlockDuplicateCalculation)
+        {
+            blockProcessingState = BPS_SOME_BLOCKS_PROCESSED;
+
+            bool skipToHandlingSubsequentRows = false, skiptoCopyingResult = false;
+
+            /* declare variables */
+            const dReal *ptrLElement;
+
+            if (completedColumnBlock < currentBlock)
+            {
+                /* compute all 2 x 2 block of X, from rows i..i+2-1 */
+                ptrLElement = L + (currentBlock * rowSkip + completedColumnBlock) * block_step;
+                ptrBElement = B + completedColumnBlock * block_step;
+
+                /* the inner loop that computes outer products and adds them to Z */
+                // The iteration starts with even number and decreases it by 2. So, it must end in zero
+                const unsigned finalColumnBlock = currentBlock;
+                dIASSERT(currentBlock == completedBlocks); // Why would we be competing for a row otherwise?
+
+                unsigned lastCompletedColumn = completedColumnBlock;
+                unsigned columnCounter = finalColumnBlock - completedColumnBlock;
+                for (bool exitInnerLoop = false; !exitInnerLoop; exitInnerLoop = --columnCounter == 0)
+                {
+                    /* declare p and q vectors, etc */
+                    dReal p[block_step], q[b_rows];
+
+                    /* compute outer product and add it to the Z matrix */
+                    p[0] = ptrLElement[0];
+                    q[0] = ptrBElement[0];
+                    Z[0][0] += p[0] * q[0];
+                    if (b_rows >= 2)
+                    {
+                        q[1] = ptrBElement[rowSkip];
+                        Z[0][1] += p[0] * q[1];
+                    }
+                    p[1] = ptrLElement[rowSkip];
+                    Z[1][0] += p[1] * q[0];
+                    if (b_rows >= 2)
+                    {
+                        Z[1][1] += p[1] * q[1];
+                    }
+
+                    /* compute outer product and add it to the Z matrix */
+                    p[0] = ptrLElement[1];
+                    q[0] = ptrBElement[1];
+                    Z[0][0] += p[0] * q[0];
+                    if (b_rows >= 2)
+                    {
+                        q[1] = ptrBElement[1 + rowSkip];
+                        Z[0][1] += p[0] * q[1];
+                    }
+                    p[1] = ptrLElement[1 + rowSkip];
+                    Z[1][0] += p[1] * q[0];
+                    if (b_rows >= 2)
+                    {
+                        Z[1][1] += p[1] * q[1];
+                    }
+
+                    dSASSERT(block_step == 2);
+                    dSASSERT(b_rows >= 1 && b_rows <= 2);
+
+                    // Check if the primary solver thread has not made any progress
+                    cellindexint descriptorVerification = blockProgressDescriptors[currentBlock];
+                    unsigned newCompletedColumn = GET_CELLDESCRIPTOR_COLUMNINDEX(descriptorVerification);
+                    
+                    if (newCompletedColumn != lastCompletedColumn)
+                    {
+                        // Check, this is the first change the current thread detects.
+                        // There is absolutely no reason in code for the computation to stop/resume twice 
+                        // while the current thread is competing.
+                        dIASSERT(lastCompletedColumn == completedColumnBlock);
+
+                        if (descriptorVerification == INVALID_CELLDESCRIPTOR)
+                        {
+                            skipToHandlingSubsequentRows = true;
+                            break;
+                        }
+
+                        if (newCompletedColumn == currentBlock + 1)
+                        {
+                            skiptoCopyingResult = true;
+                            break;
+                        }
+
+                        // Check if the current thread is behind
+                        if (newCompletedColumn > finalColumnBlock - columnCounter)
+                        {
+                            // If so, go starting over one more time
+                            blockProcessingState = BPS_COMPETING_FOR_A_BLOCK;
+                            stayWithinTheBlock = true;
+                            skipToHandlingSubsequentRows = true;
+                            break;
+                        }
+
+                        // If current thread is ahead, just save new completed column for further comparisons and go on calculating
+                        lastCompletedColumn = newCompletedColumn;
+                    }
+
+                    /* advance pointers */
+                    ptrLElement += block_step;
+                    ptrBElement += block_step;
+                    /* end of inner loop */
+                }
+            }
+            else if (completedColumnBlock > currentBlock)
+            {
+                dIASSERT(completedColumnBlock == currentBlock + 1);
+
+                skiptoCopyingResult = true;
+            }
+            else
+            {
+                dIASSERT(currentBlock == 0); // Execution can get here within the very first block only
+
+                /* assign the pointers appropriately and go on computing the results */
+                ptrLElement = L/* + (currentBlock * rowSkip + completedColumnBlock) * block_step*/;
+                ptrBElement = B/* + completedColumnBlock * block_step*/;
+            }
+
+            if (!skipToHandlingSubsequentRows)
+            {
+                if (!skiptoCopyingResult)
+                {
+                    /* finish computing the X(i) block */
+                    Y[0][0] = ptrBElement[0] - Z[0][0];
+                    if (b_rows >= 2)
+                    {
+                        Y[0][1] = ptrBElement[rowSkip] - Z[0][1];
+                    }
+
+                    dReal p2 = ptrLElement[rowSkip];
+
+                    Y[1][0] = ptrBElement[1] - Z[1][0] - p2 * Y[0][0];
+                    if (b_rows >= 2)
+                    {
+                        Y[1][1] = ptrBElement[1 + rowSkip] - Z[1][1] - p2 * Y[0][1];
+                    }
+
+                    dSASSERT(block_step == 2);
+                    dSASSERT(b_rows >= 1 && b_rows <= 2);
+
+                    // Use atomic memory barrier to make sure memory reads of ptrBElement[] and blockProgressDescriptors[] are not swapped
+                    CooperativeAtomics::AtomicReadReorderBarrier();
+
+                    cellindexint existingDescriptor = blockProgressDescriptors[currentBlock];
+
+                    if (existingDescriptor == INVALID_CELLDESCRIPTOR)
+                    {
+                        // Everything is over -- proceed to subsequent rows
+                        skipToHandlingSubsequentRows = true;
+                    }
+                    else if (existingDescriptor == MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true))
+                    {
+                        // The values computed above may not be valid. Copy the values already in the result context.
+                        skiptoCopyingResult = true;
+                    }
+                    else
+                    {
+                        // The descriptor has not been altered yet - this means the ptrBElement[] values used above were not modified yet 
+                        // and the computation result is valid.
+                        cellindexint newDescriptor = MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true); // put the computation at the top so that the evaluation result from the expression above is reused
+
+                        // Assign the results to the result context (possibly in parallel with other threads 
+                        // that could and ought to be assigning exactly the same values)
+                        FactorizationSolveL1StripeCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                        resultContext.storePrecalculatedZs(Y);
+
+                        // Assign the result assignment progress descriptor
+                        CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], existingDescriptor, newDescriptor); // the result is to be ignored
+
+                        // Whether succeeded or not, the result is valid, so go on trying to assign it to the matrix
+                    }
+                }
+
+                if (!skipToHandlingSubsequentRows)
+                {
+                    if (skiptoCopyingResult)
+                    {
+                        // Extract the result values stored in the result context
+                        const FactorizationSolveL1StripeCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                        resultContext.loadPrecalculatedZs(Y);
+
+                        ptrBElement = B + currentBlock * block_step;
+                    }
+
+                    goAssigningTheResult = true;
+                }
+            }
+        }
+
+        if (goAssigningTheResult)
+        {
+            cellindexint existingDescriptor = blockProgressDescriptors[currentBlock];
+            // Check if the assignment has not been completed yet
+            if (existingDescriptor != INVALID_CELLDESCRIPTOR)
+            {
+                // Assign the computation results to their places in the matrix
+                ptrBElement[0] = Y[0][0];
+                ptrBElement[1] = Y[1][0];
+                if (b_rows >= 2)
+                {
+                    ptrBElement[rowSkip] = Y[0][1];
+                    ptrBElement[1 + rowSkip] = Y[1][1];
+                }
+
+                dSASSERT(block_step == 2);
+                dSASSERT(b_rows >= 1 && b_rows <= 2);
+
+                ThrsafeIncrementIntUpToLimit(&refBlockCompletionProgress, currentBlock + 1);
+                dIASSERT(refBlockCompletionProgress >= currentBlock + 1);
+
+                // And assign the completed status no matter what
+                CooperativeAtomics::AtomicStoreCellindexint(&blockProgressDescriptors[currentBlock], INVALID_CELLDESCRIPTOR);
+            }
+            else 
+            {
+                // everything is over -- just go handling next blocks
+            }
+        }
+
+        if (!stayWithinTheBlock)
+        {
+            completedBlocks = refBlockCompletionProgress;
+            
+            if (completedBlocks == blockCount)
+            {
+                break;
+            }
+
+            currentBlock += 1;
+
+            bool lookaheadBoundaryReached = false;
+
+            if (currentBlock == blockCount || completedBlocks == 0)
+            {
+                lookaheadBoundaryReached = true;
+            }
+            else if (currentBlock >= completedBlocks + lookaheadRange)
+            {
+                lookaheadBoundaryReached = blockProcessingState > BPS_NO_BLOCKS_PROCESSED;
+            }
+            else if (currentBlock < completedBlocks)
+            {
+                // Treat detected row advancement as a row processed
+                // blockProcessingState = BPS_SOME_BLOCKS_PROCESSED; <-- performs better without it
+
+                currentBlock = completedBlocks;
+            }
+
+            if (lookaheadBoundaryReached)
+            {
+                dIASSERT(blockProcessingState != BPS_COMPETING_FOR_A_BLOCK); // Why did not we compete???
+
+                // If no row has been processed in the previous pass, compete for the next row to avoid cycling uselessly
+                if (blockProcessingState <= BPS_NO_BLOCKS_PROCESSED)
+                {
+                    // Abandon job if too few blocks remain
+                    if (blockCount - completedBlocks <= ownThreadIndex)
+                    {
+                        break;
+                    }
+
+                    blockProcessingState = BPS_COMPETING_FOR_A_BLOCK;
+                }
+                else
+                {
+                    // If there was some progress, just continue to the next pass
+                    blockProcessingState = BPS_NO_BLOCKS_PROCESSED;
+                }
+
+                currentBlock = completedBlocks;
+            }
+        }
+    }
+}
+
+
+template<unsigned int a_rows, unsigned int d_stride>
+/*static */
+void ThreadedEquationSolverLDLT::participateScalingAndFactorizingL1Stripe_X(dReal *ARow, dReal *d, unsigned factorizationRow, unsigned rowSkip,
+    FactorizationFactorizeL1StripeContext *factorizationContext, unsigned ownThreadIndex)
+{
+    dIASSERT(factorizationRow != 0);
+    dIASSERT(factorizationRow % 2 == 0);
+
+    /* scale the elements in a 2 x i block at A(i,0), and also */
+    /* compute Z = the outer product matrix that we'll need. */
+    dReal sameZ[a_rows] = { REAL(0.0), }, mixedZ[dMACRO_MAX(a_rows - 1, 1)] = { REAL(0.0), };
+    bool doneAnything = false;
+
+    const unsigned blockSize = deriveScalingAndFactorizingL1StripeBlockSize(a_rows);
+
+    const unsigned blockCount = deriveScalingAndFactorizingL1StripeBlockCountFromFactorizationRow(factorizationRow, blockSize);
+    dIASSERT(blockCount != 0);
+
+    unsigned blockIndex;
+    while ((blockIndex = ThrsafeIncrementIntUpToLimit(&factorizationContext->m_nextColumnIndex, blockCount)) != blockCount) 
+    {
+        doneAnything = true;
+        unsigned blockStartRow = blockIndex * blockSize;
+
+        dReal *ptrAElement = ARow + blockStartRow;
+        dReal *ptrDElement = d + blockStartRow * d_stride;
+        for (unsigned columnCounter = blockIndex != blockCount - 1 ? blockSize : factorizationRow - blockStartRow; ; )
+        {
+            dReal p1, q1, p2, q2, dd;
+
+            p1 = ptrAElement[0];
+            if (a_rows >= 2)
+            {
+                p2 = ptrAElement[rowSkip];
+            }
+            dd = ptrDElement[0 * d_stride];
+            q1 = p1 * dd;
+            if (a_rows >= 2)
+            {
+                q2 = p2 * dd;
+            }
+            ptrAElement[0] = q1;
+            if (a_rows >= 2)
+            {
+                ptrAElement[rowSkip] = q2;
+            }
+            sameZ[0] += p1 * q1;
+            if (a_rows >= 2)
+            {
+                sameZ[1] += p2 * q2;
+                mixedZ[0] += p2 * q1;
+            }
+
+            p1 = ptrAElement[1];
+            if (a_rows >= 2)
+            {
+                p2 = ptrAElement[1 + rowSkip];
+            }
+            dd = ptrDElement[1 * d_stride];
+            q1 = p1 * dd;
+            if (a_rows >= 2)
+            {
+                q2 = p2 * dd;
+            }
+            ptrAElement[1] = q1;
+            if (a_rows >= 2)
+            {
+                ptrAElement[1 + rowSkip] = q2;
+            }
+            sameZ[0] += p1 * q1;
+            if (a_rows >= 2)
+            {
+                sameZ[1] += p2 * q2;
+                mixedZ[0] += p2 * q1;
+            }
+
+            if (columnCounter > 6)
+            {
+                columnCounter -= 6;
+
+                ptrAElement += 6;
+                ptrDElement += 6 * d_stride;
+
+                p1 = ptrAElement[-4];
+                if (a_rows >= 2)
+                {
+                    p2 = ptrAElement[-4 + rowSkip];
+                }
+                dd = ptrDElement[-4 * (int)d_stride];
+                q1 = p1 * dd;
+                if (a_rows >= 2)
+                {
+                    q2 = p2 * dd;
+                }
+                ptrAElement[-4] = q1;
+                if (a_rows >= 2)
+                {
+                    ptrAElement[-4 + rowSkip] = q2;
+                }
+                sameZ[0] += p1 * q1;
+                if (a_rows >= 2)
+                {
+                    sameZ[1] += p2 * q2;
+                    mixedZ[0] += p2 * q1;
+                }
+
+                p1 = ptrAElement[-3];
+                if (a_rows >= 2)
+                {
+                    p2 = ptrAElement[-3 + rowSkip];
+                }
+                dd = ptrDElement[-3 * (int)d_stride];
+                q1 = p1 * dd;
+                if (a_rows >= 2)
+                {
+                    q2 = p2 * dd;
+                }
+                ptrAElement[-3] = q1;
+                if (a_rows >= 2)
+                {
+                    ptrAElement[-3 + rowSkip] = q2;
+                }
+                sameZ[0] += p1 * q1;
+                if (a_rows >= 2)
+                {
+                    sameZ[1] += p2 * q2;
+                    mixedZ[0] += p2 * q1;
+                }
+
+                p1 = ptrAElement[-2];
+                if (a_rows >= 2)
+                {
+                    p2 = ptrAElement[-2 + rowSkip];
+                }
+                dd = ptrDElement[-2 * (int)d_stride];
+                q1 = p1 * dd;
+                if (a_rows >= 2)
+                {
+                    q2 = p2 * dd;
+                }
+                ptrAElement[-2] = q1;
+                if (a_rows >= 2)
+                {
+                    ptrAElement[-2 + rowSkip] = q2;
+                }
+                sameZ[0] += p1 * q1;
+                if (a_rows >= 2)
+                {
+                    sameZ[1] += p2 * q2;
+                    mixedZ[0] += p2 * q1;
+                }
+
+                p1 = ptrAElement[-1];
+                if (a_rows >= 2)
+                {
+                    p2 = ptrAElement[-1 + rowSkip];
+                }
+                dd = ptrDElement[-1 * (int)d_stride];
+                q1 = p1 * dd;
+                if (a_rows >= 2)
+                {
+                    q2 = p2 * dd;
+                }
+                ptrAElement[-1] = q1;
+                if (a_rows >= 2)
+                {
+                    ptrAElement[-1 + rowSkip] = q2;
+                }
+                sameZ[0] += p1 * q1;
+                if (a_rows >= 2)
+                {
+                    sameZ[1] += p2 * q2;
+                    mixedZ[0] += p2 * q1;
+                }
+            }
+            else
+            {
+                ptrAElement += 2;
+                ptrDElement += 2 * d_stride;
+
+                if ((columnCounter -= 2) == 0)
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    if (doneAnything)
+    {
+        unsigned partialSumThreadIndex;
+        for (bool exitLoop = false; !exitLoop; exitLoop = CooperativeAtomics::AtomicCompareExchangeUint32(&factorizationContext->m_sumThreadIndex, partialSumThreadIndex, ownThreadIndex + 1))
+        {
+            partialSumThreadIndex = factorizationContext->m_sumThreadIndex;
+            
+            if (partialSumThreadIndex != 0)
+            {
+                const FactorizationFactorizeL1StripeThreadContext &partialSumContext = factorizationContext->m_threadContexts[partialSumThreadIndex - 1];
+                factorizationContext->m_threadContexts[ownThreadIndex].assignDataSum<a_rows>(sameZ, mixedZ, partialSumContext);
+            }
+            else
+            {
+                factorizationContext->m_threadContexts[ownThreadIndex].assignDataAlone<a_rows>(sameZ, mixedZ);
+            }
+        }
+    }
+
+    unsigned threadExitIndex = CooperativeAtomics::AtomicDecrementUint32(&factorizationContext->m_threadsRunning);
+    dIASSERT(threadExitIndex + 1U != 0);
+
+    if (threadExitIndex == 0)
+    {
+        // Let the last thread retrieve the sum and perform final computations
+        unsigned sumThreadIndex = factorizationContext->m_sumThreadIndex;
+        dIASSERT(sumThreadIndex != 0); // The rowIndex was asserted to be not zero, so at least one thread must have done something
+
+        const FactorizationFactorizeL1StripeThreadContext &sumContext = factorizationContext->m_threadContexts[sumThreadIndex - 1];
+        sumContext.retrieveData<a_rows>(sameZ, mixedZ);
+
+        dReal *ptrAElement = ARow + factorizationRow;
+        dReal *ptrDElement = d + factorizationRow * d_stride;
+
+        /* solve for diagonal 2 x 2 block at A(i,i) */
+        dReal Y11, Y21, Y22;
+        
+        Y11 = ptrAElement[0] - sameZ[0];
+        if (a_rows >= 2)
+        {
+            Y21 = ptrAElement[rowSkip] - mixedZ[0];
+            Y22 = ptrAElement[1 + rowSkip] - sameZ[1];
+        }
+
+        /* factorize 2 x 2 block Y, ptrDElement */
+        /* factorize row 1 */
+        dReal dd = dRecip(Y11);
+
+        ptrDElement[0 * d_stride] = dd;
+        dIASSERT(ptrDElement == d + (sizeint)factorizationRow * d_stride);
+
+        if (a_rows >= 2)
+        {
+            /* factorize row 2 */
+            dReal q2 = Y21 * dd;
+            ptrAElement[rowSkip] = q2;
+
+            dReal sum = Y21 * q2;
+            ptrDElement[1 * d_stride] = dRecip(Y22 - sum);
+        }
+    }
+}
+
+
+#endif // #ifndef _ODE_FASTLDLT_IMPL_H_
diff --git a/libs/ode-0.16.1/ode/src/fastldltsolve.cpp b/libs/ode-0.16.1/ode/src/fastldltsolve.cpp
new file mode 100644
index 0000000..ca1ff4d
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastldltsolve.cpp
@@ -0,0 +1,222 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/* 
+ * LDLT solving related code of ThreadedEquationSolverLDLT 
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+#include <ode/common.h>
+#include <ode/matrix.h>
+#include <ode/matrix_coop.h>
+#include "config.h"
+#include "threaded_solver_ldlt.h"
+#include "threading_base.h"
+#include "resource_control.h"
+
+#include "fastldltsolve_impl.h"
+
+
+/*static */
+void ThreadedEquationSolverLDLT::estimateCooperativeSolvingLDLTResourceRequirements(
+    dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+    unsigned allowedThreadCount, unsigned rowCount)
+{
+    unsigned stageBlockCountSifficiencyMask;
+    dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
+    unsigned limitedThreadCount = restrictSolvingLDLTAllowedThreadCount(threading, allowedThreadCount, rowCount, stageBlockCountSifficiencyMask);
+
+    if (limitedThreadCount > 1)
+    {
+        if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_STRAIGHT)) != 0)
+        {
+            doEstimateCooperativeSolvingL1StraightResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
+        }
+
+        if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SCALING_VECTOR)) != 0)
+        {
+            doEstimateCooperativeScalingVectorResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
+        }
+
+        if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_TRANSPOSED)) == 0)
+        {
+            doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
+        }
+    }
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::cooperativelySolveLDLT(
+    dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+    const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+    dAASSERT(rowCount != 0);
+
+    unsigned stageBlockCountSifficiencyMask;
+
+    dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+    unsigned limitedThreadCount = restrictSolvingLDLTAllowedThreadCount(threading, allowedThreadCount, rowCount, stageBlockCountSifficiencyMask);
+
+    if (limitedThreadCount <= 1)
+    {
+        solveEquationSystemWithLDLT<SLDLT_D_STRIDE, SLDLT_B_STRIDE>(L, d, b, rowCount, rowSkip);
+    }
+    else
+    {
+        doCooperativelySolveLDLTValidated(resourceContainer, limitedThreadCount, stageBlockCountSifficiencyMask, L, d, b, rowCount, rowSkip);
+    }
+}
+
+/*static */
+unsigned ThreadedEquationSolverLDLT::restrictSolvingLDLTAllowedThreadCount(
+    dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount, unsigned &out_stageBlockCountSifficiencyMask)
+{
+    unsigned limitedThreadCount = 1;
+    unsigned stageBlockCountSifficiencyMask = 0;
+
+#if dCOOPERATIVE_ENABLED
+    {
+        const unsigned int blockStep = SL1S_BLOCK_SIZE; // Required by the implementation
+        unsigned solvingStraightBlockCount = deriveSolvingL1StraightBlockCount(rowCount, blockStep);
+        dIASSERT(deriveSolvingL1StraightThreadCount(SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
+
+        if (solvingStraightBlockCount >= SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+        {
+            stageBlockCountSifficiencyMask |= 1U << SLDLTS_SOLVING_STRAIGHT;
+        }
+    }
+
+    {
+        const unsigned int blockStep = SV_BLOCK_SIZE; // Required by the implementation
+        unsigned scalingBlockCount = deriveScalingVectorBlockCount(rowCount, blockStep);
+        dIASSERT(deriveScalingVectorThreadCount(SV_COOPERATIVE_BLOCK_COUNT_MINIMUM - 1, 2) > 1);
+
+        if (scalingBlockCount >= SV_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+        {
+            stageBlockCountSifficiencyMask |= 1U << SLDLTS_SCALING_VECTOR;
+        }
+    }
+
+    {
+        const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
+        unsigned solvingTransposedBlockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
+        dIASSERT(deriveSolvingL1TransposedThreadCount(SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
+
+        if (solvingTransposedBlockCount >= SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+        {
+            stageBlockCountSifficiencyMask |= 1U << SLDLTS_SOLVING_TRANSPOSED;
+        }
+    }
+
+    if (stageBlockCountSifficiencyMask != 0)
+    {
+        limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, true);
+    }
+#endif // #if dCOOPERATIVE_ENABLED
+
+    out_stageBlockCountSifficiencyMask = stageBlockCountSifficiencyMask;
+    return limitedThreadCount;
+}
+
+
+/*static */
+void ThreadedEquationSolverLDLT::doCooperativelySolveLDLTValidated(
+    dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, unsigned stageBlockCountSifficiencyMask, 
+    const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+    dIASSERT(allowedThreadCount > 1);
+
+    if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_STRAIGHT)) == 0)
+    {
+        solveL1Straight<SLDLT_B_STRIDE>(L, b, rowCount, rowSkip);
+    }
+    else
+    {
+        dSASSERT(SLDLT_B_STRIDE + 0 == SL1S_B_STRIDE);
+
+        doCooperativelySolveL1StraightValidated(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);
+    }
+
+    if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SCALING_VECTOR)) == 0)
+    {
+        scaleLargeVector<SLDLT_B_STRIDE, SLDLT_D_STRIDE>(b, d, rowCount);
+    }
+    else
+    {
+        dSASSERT(SLDLT_B_STRIDE + 0 == SV_A_STRIDE);
+        dSASSERT(SLDLT_D_STRIDE + 0 == SV_D_STRIDE);
+
+        doCooperativelyScaleVectorValidated(resourceContainer, allowedThreadCount, b, d, rowCount);
+    }
+
+    if ((stageBlockCountSifficiencyMask & (1U << SLDLTS_SOLVING_TRANSPOSED)) == 0)
+    {
+        solveL1Transposed<SLDLT_B_STRIDE>(L, b, rowCount, rowSkip);
+    }
+    else
+    {
+        dSASSERT(SLDLT_B_STRIDE + 0 == SL1T_B_STRIDE);
+
+        doCooperativelySolveL1TransposedValidated(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Public interface functions
+
+/*extern ODE_API */
+void dSolveLDLT(const dReal *L, const dReal *d, dReal *b, int n, int nskip)
+{
+    dAASSERT(n != 0);
+
+    if (n != 0)
+    {
+        dAASSERT(L != NULL);
+        dAASSERT(d != NULL);
+        dAASSERT(b != NULL);
+
+        solveEquationSystemWithLDLT<1, 1>(L, d, b, n, nskip);
+    }
+}
+
+
+/*extern ODE_API */
+void dEstimateCooperativelySolveLDLTResourceRequirements(dResourceRequirementsID requirements,
+    unsigned maximalAllowedThreadCount, unsigned maximalRowCount)
+{
+    dAASSERT(requirements != NULL);
+
+    dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
+    ThreadedEquationSolverLDLT::estimateCooperativeSolvingLDLTResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalRowCount);
+}
+
+/*extern ODE_API */
+void dCooperativelySolveLDLT(dResourceContainerID resources, unsigned allowedThreadCount, 
+    const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+    dAASSERT(resources != NULL);
+
+    dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
+    ThreadedEquationSolverLDLT::cooperativelySolveLDLT(resourceContainer, allowedThreadCount, L, d, b, rowCount, rowSkip);
+}
+
diff --git a/libs/ode-0.16.1/ode/src/fastldltsolve_impl.h b/libs/ode-0.16.1/ode/src/fastldltsolve_impl.h
new file mode 100644
index 0000000..ad6f393
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastldltsolve_impl.h
@@ -0,0 +1,49 @@
+
+
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_MATRIX_IMPL_H_
+#define _ODE_MATRIX_IMPL_H_
+
+
+#include "fastlsolve_impl.h"
+#include "fastltsolve_impl.h"
+#include "fastvecscale_impl.h"
+
+
+template<unsigned int d_stride, unsigned int b_stride>
+void solveEquationSystemWithLDLT(const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+    dAASSERT(L != NULL);
+    dAASSERT(d != NULL);
+    dAASSERT(b != NULL);
+    dAASSERT(rowCount > 0);
+    dAASSERT(rowSkip >= rowCount);
+
+    solveL1Straight<b_stride>(L, b, rowCount, rowSkip);
+    scaleLargeVector<b_stride, d_stride>(b, d, rowCount);
+    solveL1Transposed<b_stride>(L, b, rowCount, rowSkip);
+}
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/fastlsolve.cpp b/libs/ode-0.16.1/ode/src/fastlsolve.cpp
new file mode 100644
index 0000000..6f7e6a4
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastlsolve.cpp
@@ -0,0 +1,230 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * L1Straight Equation Solving Routines
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+#include <ode/common.h>
+#include <ode/matrix.h>
+#include <ode/matrix_coop.h>
+#include "config.h"
+#include "threaded_solver_ldlt.h"
+#include "threading_base.h"
+#include "resource_control.h"
+#include "error.h"
+
+#include "fastlsolve_impl.h"
+
+
+/*static */
+void ThreadedEquationSolverLDLT::estimateCooperativeSolvingL1StraightResourceRequirements(
+    dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+    unsigned allowedThreadCount, unsigned rowCount)
+{
+    dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
+    unsigned limitedThreadCount = restrictSolvingL1StraightAllowedThreadCount(threading, allowedThreadCount, rowCount);
+
+    if (limitedThreadCount > 1)
+    {
+        doEstimateCooperativeSolvingL1StraightResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
+    }
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::cooperativelySolveL1Straight(
+    dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+    const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+    dAASSERT(rowCount != 0);
+
+    dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+    unsigned limitedThreadCount = restrictSolvingL1StraightAllowedThreadCount(threading, allowedThreadCount, rowCount);
+
+    if (limitedThreadCount <= 1)
+    {
+        solveL1Straight<SL1S_B_STRIDE>(L, b, rowCount, rowSkip);
+    }
+    else
+    {
+        doCooperativelySolveL1StraightValidated(resourceContainer, limitedThreadCount, L, b, rowCount, rowSkip);
+    }
+}
+
+
+/*static */
+unsigned ThreadedEquationSolverLDLT::restrictSolvingL1StraightAllowedThreadCount(
+    dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount)
+{
+    unsigned limitedThreadCount = 1;
+
+#if dCOOPERATIVE_ENABLED
+    const unsigned int blockStep = SL1S_BLOCK_SIZE; // Required by the implementation
+    unsigned solvingBlockCount = deriveSolvingL1StraightBlockCount(rowCount, blockStep);
+    dIASSERT(deriveSolvingL1StraightThreadCount(SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
+
+    if (solvingBlockCount >= SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+    {
+        limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, true);
+    }
+#endif // #if dCOOPERATIVE_ENABLED
+
+    return limitedThreadCount;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::doEstimateCooperativeSolvingL1StraightResourceRequirementsValidated(
+    dxResourceRequirementDescriptor *summaryRequirementsDescriptor, 
+    unsigned allowedThreadCount, unsigned rowCount)
+{
+    const unsigned int blockStep = SL1S_BLOCK_SIZE; // Required by the implementation
+    unsigned blockCount = deriveSolvingL1StraightBlockCount(rowCount, blockStep);
+    dIASSERT(blockCount >= 1);
+
+    unsigned threadCountToUse = deriveSolvingL1StraightThreadCount(blockCount, allowedThreadCount);
+    dIASSERT(threadCountToUse > 1);
+
+    unsigned simultaneousCallCount = 1 + (threadCountToUse - 1);
+
+    SolvingL1StraightMemoryEstimates solvingMemoryEstimates;
+    sizeint solvingMemoryRequired = estimateCooperativelySolvingL1StraightMemoryRequirement<blockStep>(rowCount, solvingMemoryEstimates);
+    const unsigned solvingAlignmentRequired = ALLOCATION_DEFAULT_ALIGNMENT;
+
+    unsigned featureRequirement = dxResourceRequirementDescriptor::STOCK_CALLWAIT_REQUIRED;
+    summaryRequirementsDescriptor->mergeAnotherDescriptorIn(solvingMemoryRequired, solvingAlignmentRequired, simultaneousCallCount, featureRequirement);
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::doCooperativelySolveL1StraightValidated(
+    dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+    const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+    dIASSERT(allowedThreadCount > 1);
+
+    const unsigned int blockStep = SL1S_BLOCK_SIZE; // Required by the implementation
+    unsigned blockCount = deriveSolvingL1StraightBlockCount(rowCount, blockStep);
+    dIASSERT(blockCount >= 1);
+
+    unsigned threadCountToUse = deriveSolvingL1StraightThreadCount(blockCount, allowedThreadCount);
+    dIASSERT(threadCountToUse > 1);
+
+    dCallWaitID completionWait = resourceContainer->getStockCallWait();
+    dAASSERT(completionWait != NULL);
+
+    atomicord32 blockCompletionProgress;
+    cellindexint *blockProgressDescriptors;
+    SolveL1StraightCellContext *cellContexts;
+
+    SolvingL1StraightMemoryEstimates solvingMemoryEstimates;
+    sizeint solvingMemoryRequired = estimateCooperativelySolvingL1StraightMemoryRequirement<blockStep>(rowCount, solvingMemoryEstimates);
+    dIASSERT(solvingMemoryRequired <= resourceContainer->getMemoryBufferSize());
+
+    void *bufferAllocated = resourceContainer->getMemoryBufferPointer();
+    dIASSERT(bufferAllocated != NULL);
+    dIASSERT(dALIGN_PTR(bufferAllocated, ALLOCATION_DEFAULT_ALIGNMENT) == bufferAllocated);
+
+    void *bufferCurrentLocation = bufferAllocated;
+    bufferCurrentLocation = markCooperativelySolvingL1StraightMemoryStructuresOut(bufferCurrentLocation, solvingMemoryEstimates, blockProgressDescriptors, cellContexts);
+    dIVERIFY(bufferCurrentLocation <= (uint8 *)bufferAllocated + solvingMemoryRequired);
+
+    initializeCooperativelySolveL1StraightMemoryStructures<blockStep>(rowCount, blockCompletionProgress, blockProgressDescriptors, cellContexts);
+
+    dCallReleaseeID calculationFinishReleasee;
+    SolveL1StraightWorkerContext workerContext; // The variable must exist in the outer scope
+
+    workerContext.init(L, b, rowCount, rowSkip, blockCompletionProgress, blockProgressDescriptors, cellContexts);
+
+    dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+    threading->PostThreadedCall(NULL, &calculationFinishReleasee, threadCountToUse - 1, NULL, completionWait, &solveL1Straight_completion_callback, NULL, 0, "SolveL1Straight Completion");
+    threading->PostThreadedCallsGroup(NULL, threadCountToUse - 1, calculationFinishReleasee, &solveL1Straight_worker_callback, &workerContext, "SolveL1Straight Work");
+
+    participateSolvingL1Straight<blockStep, SL1S_B_STRIDE>(L, b, rowCount, rowSkip, blockCompletionProgress, blockProgressDescriptors, cellContexts, threadCountToUse - 1);
+
+    threading->WaitThreadedCallExclusively(NULL, completionWait, NULL, "SolveL1Straight End Wait");
+}
+
+/*static */
+int ThreadedEquationSolverLDLT::solveL1Straight_worker_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    SolveL1StraightWorkerContext *ptrContext = (SolveL1StraightWorkerContext *)callContext;
+
+    solveL1Straight_worker(*ptrContext, dCAST_TO_SMALLER(unsigned, callInstanceIndex));
+
+    return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::solveL1Straight_worker(SolveL1StraightWorkerContext &ref_context, unsigned ownThreadIndex)
+{
+    const unsigned blockStep = SL1S_BLOCK_SIZE;
+
+    participateSolvingL1Straight<blockStep, SL1S_B_STRIDE>(ref_context.m_L, ref_context.m_b, ref_context.m_rowCount, ref_context.m_rowSkip, 
+        *ref_context.m_ptrBlockCompletionProgress, ref_context.m_blockProgressDescriptors, ref_context.m_cellContexts, ownThreadIndex);
+}
+
+/*static */
+int ThreadedEquationSolverLDLT::solveL1Straight_completion_callback(void *dUNUSED(callContext), dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    return 1;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// Public interface functions
+
+/*extern ODE_API */
+void dSolveL1(const dReal *L, dReal *B, int n, int lskip1)
+{
+    dAASSERT(n != 0);
+
+    if (n != 0)
+    {
+        dAASSERT(L != NULL);
+        dAASSERT(B != NULL);
+
+        solveL1Straight<1>(L, B, n, lskip1);
+    }
+}
+
+
+/*extern ODE_API */
+void dEstimateCooperativelySolveL1StraightResourceRequirements(dResourceRequirementsID requirements,
+    unsigned maximalAllowedThreadCount, unsigned maximalRowCount)
+{
+    dAASSERT(requirements != NULL);
+
+    dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
+    ThreadedEquationSolverLDLT::estimateCooperativeSolvingL1StraightResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalRowCount);
+}
+
+/*extern ODE_API */
+void dCooperativelySolveL1Straight(dResourceContainerID resources, unsigned allowedThreadCount, 
+    const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+    dAASSERT(resources != NULL);
+
+    dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
+    ThreadedEquationSolverLDLT::cooperativelySolveL1Straight(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);
+}
+
diff --git a/libs/ode-0.16.1/ode/src/fastlsolve_impl.h b/libs/ode-0.16.1/ode/src/fastlsolve_impl.h
new file mode 100644
index 0000000..f14ada7
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastlsolve_impl.h
@@ -0,0 +1,1610 @@
+
+
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * Code style improvements and optimizations by Oleh Derevenko ????-2019
+ * L1Straight cooperative solving code of ThreadedEquationSolverLDLT copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")  
+ */
+
+#ifndef _ODE_FASTLSOLVE_IMPL_H_
+#define _ODE_FASTLSOLVE_IMPL_H_
+
+
+/* solve L*X=B, with B containing 1 right hand sides.
+ * L is an n*n lower triangular matrix with ones on the diagonal.
+ * L is stored by rows and its leading dimension is lskip.
+ * B is an n*1 matrix that contains the right hand sides.
+ * B is stored by columns and its leading dimension is also lskip.
+ * B is overwritten with X.
+ * this processes blocks of 4*4.
+ * if this is in the factorizer source file, n must be a multiple of 4.
+ */
+
+template<unsigned int b_stride>
+void solveL1Straight (const dReal *L, dReal *B, unsigned rowCount, unsigned rowSkip)
+{
+    dIASSERT(rowCount != 0);
+
+    /* compute all 4 x 1 blocks of X */
+    unsigned blockStartRow = 0;
+    bool subsequentPass = false;
+    bool goForLoopX4 = rowCount >= 4;
+    const unsigned loopX4LastRow = goForLoopX4 ? rowCount - 4 : 0;
+    for (; goForLoopX4; subsequentPass = true, goForLoopX4 = (blockStartRow += 4) <= loopX4LastRow) 
+    {
+        /* declare variables - Z matrix, p and q vectors, etc */
+        const dReal *ptrLElement;
+        dReal *ptrBElement;
+
+        dReal Z11, Z21, Z31, Z41;
+
+        /* compute all 4 x 1 block of X, from rows i..i+4-1 */
+        if (subsequentPass)
+        {
+            ptrLElement = L + (1 + blockStartRow) * rowSkip;
+            ptrBElement = B;
+            /* set the Z matrix to 0 */
+            Z11 = 0; Z21 = 0; Z31 = 0; Z41 = 0;
+
+            /* the inner loop that computes outer products and adds them to Z */
+            for (unsigned columnCounter = blockStartRow; ; )
+            {
+                dReal q1, p1, p2, p3, p4;
+
+                /* load p and q values */
+                q1 = ptrBElement[0 * b_stride];
+                p1 = (ptrLElement - rowSkip)[0];
+                p2 = ptrLElement[0];
+                ptrLElement += rowSkip;
+                p3 = ptrLElement[0];
+                p4 = ptrLElement[0 + rowSkip];
+
+                /* compute outer product and add it to the Z matrix */
+                Z11 += p1 * q1;
+                Z21 += p2 * q1;
+                Z31 += p3 * q1;
+                Z41 += p4 * q1;
+
+                /* load p and q values */
+                q1 = ptrBElement[1 * b_stride];
+                p3 = ptrLElement[1];
+                p4 = ptrLElement[1 + rowSkip];
+                ptrLElement -= rowSkip;
+                p1 = (ptrLElement - rowSkip)[1];
+                p2 = ptrLElement[1];
+
+                /* compute outer product and add it to the Z matrix */
+                Z11 += p1 * q1;
+                Z21 += p2 * q1;
+                Z31 += p3 * q1;
+                Z41 += p4 * q1;
+
+                /* load p and q values */
+                q1 = ptrBElement[2 * b_stride];
+                p1 = (ptrLElement - rowSkip)[2];
+                p2 = ptrLElement[2];
+                ptrLElement += rowSkip;
+                p3 = ptrLElement[2];
+                p4 = ptrLElement[2 + rowSkip];
+
+                /* compute outer product and add it to the Z matrix */
+                Z11 += p1 * q1;
+                Z21 += p2 * q1;
+                Z31 += p3 * q1;
+                Z41 += p4 * q1;
+
+                /* load p and q values */
+                q1 = ptrBElement[3 * b_stride];
+                p3 = ptrLElement[3];
+                p4 = ptrLElement[3 + rowSkip];
+                ptrLElement -= rowSkip;
+                p1 = (ptrLElement - rowSkip)[3];
+                p2 = ptrLElement[3];
+
+                /* compute outer product and add it to the Z matrix */
+                Z11 += p1 * q1;
+                Z21 += p2 * q1;
+                Z31 += p3 * q1;
+                Z41 += p4 * q1;
+
+                if (columnCounter > 12)
+                {
+                    columnCounter -= 12;
+
+                    /* advance pointers */
+                    ptrLElement += 12;
+                    ptrBElement += 12 * b_stride;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-8 * (int)b_stride];
+                    p1 = (ptrLElement - rowSkip)[-8];
+                    p2 = ptrLElement[-8];
+                    ptrLElement += rowSkip;
+                    p3 = ptrLElement[-8];
+                    p4 = ptrLElement[-8 + rowSkip];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z21 += p2 * q1;
+                    Z31 += p3 * q1;
+                    Z41 += p4 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-7 * (int)b_stride];
+                    p3 = ptrLElement[-7];
+                    p4 = ptrLElement[-7 + rowSkip];
+                    ptrLElement -= rowSkip;
+                    p1 = (ptrLElement - rowSkip)[-7];
+                    p2 = ptrLElement[-7];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z21 += p2 * q1;
+                    Z31 += p3 * q1;
+                    Z41 += p4 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-6 * (int)b_stride];
+                    p1 = (ptrLElement - rowSkip)[-6];
+                    p2 = ptrLElement[-6];
+                    ptrLElement += rowSkip;
+                    p3 = ptrLElement[-6];
+                    p4 = ptrLElement[-6 + rowSkip];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z21 += p2 * q1;
+                    Z31 += p3 * q1;
+                    Z41 += p4 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-5 * (int)b_stride];
+                    p3 = ptrLElement[-5];
+                    p4 = ptrLElement[-5 + rowSkip];
+                    ptrLElement -= rowSkip;
+                    p1 = (ptrLElement - rowSkip)[-5];
+                    p2 = ptrLElement[-5];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z21 += p2 * q1;
+                    Z31 += p3 * q1;
+                    Z41 += p4 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-4 * (int)b_stride];
+                    p1 = (ptrLElement - rowSkip)[-4];
+                    p2 = ptrLElement[-4];
+                    ptrLElement += rowSkip;
+                    p3 = ptrLElement[-4];
+                    p4 = ptrLElement[-4 + rowSkip];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z21 += p2 * q1;
+                    Z31 += p3 * q1;
+                    Z41 += p4 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-3 * (int)b_stride];
+                    p3 = ptrLElement[-3];
+                    p4 = ptrLElement[-3 + rowSkip];
+                    ptrLElement -= rowSkip;
+                    p1 = (ptrLElement - rowSkip)[-3];
+                    p2 = ptrLElement[-3];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z21 += p2 * q1;
+                    Z31 += p3 * q1;
+                    Z41 += p4 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-2 * (int)b_stride];
+                    p1 = (ptrLElement - rowSkip)[-2];
+                    p2 = ptrLElement[-2];
+                    ptrLElement += rowSkip;
+                    p3 = ptrLElement[-2];
+                    p4 = ptrLElement[-2 + rowSkip];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z21 += p2 * q1;
+                    Z31 += p3 * q1;
+                    Z41 += p4 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-1 * (int)b_stride];
+                    p3 = ptrLElement[-1];
+                    p4 = ptrLElement[-1 + rowSkip];
+                    ptrLElement -= rowSkip;
+                    p1 = (ptrLElement - rowSkip)[-1];
+                    p2 = ptrLElement[-1];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z21 += p2 * q1;
+                    Z31 += p3 * q1;
+                    Z41 += p4 * q1;
+                }
+                else
+                {
+                    /* advance pointers */
+                    ptrLElement += 4;
+                    ptrBElement += 4 * b_stride;
+
+                    if ((columnCounter -= 4) == 0)
+                    {
+                        break;
+                    }
+                }
+                /* end of inner loop */
+            }
+        }
+        else
+        {
+            ptrLElement = L + rowSkip/* + blockStartRow * rowSkip*/; dIASSERT(blockStartRow == 0);
+            ptrBElement = B;
+            /* set the Z matrix to 0 */
+            Z11 = 0; Z21 = 0; Z31 = 0; Z41 = 0;
+        }
+
+        /* finish computing the X(i) block */
+        dReal Y11, Y21, Y31, Y41;
+        {
+            Y11 = ptrBElement[0 * b_stride] - Z11;
+            ptrBElement[0 * b_stride] = Y11;
+        }
+        {
+            dReal p2 = ptrLElement[0];
+            Y21 = ptrBElement[1 * b_stride] - Z21 - p2 * Y11;
+            ptrBElement[1 * b_stride] = Y21;
+        }
+        ptrLElement += rowSkip;
+        {
+            dReal p3 = ptrLElement[0];
+            dReal p3_1 = ptrLElement[1];
+            Y31 = ptrBElement[2 * b_stride] - Z31 - p3 * Y11 - p3_1 * Y21;
+            ptrBElement[2 * b_stride] = Y31;
+        }
+        {
+            dReal p4 = ptrLElement[rowSkip];
+            dReal p4_1 = ptrLElement[1 + rowSkip];
+            dReal p4_2 = ptrLElement[2 + rowSkip];
+            Y41 = ptrBElement[3 * b_stride] - Z41 - p4 * Y11 - p4_1 * Y21 - p4_2 * Y31;
+            ptrBElement[3 * b_stride] = Y41;
+        }
+        /* end of outer loop */
+    }
+
+    /* compute rows at end that are not a multiple of block size */
+    for (; !subsequentPass || blockStartRow < rowCount; subsequentPass = true, ++blockStartRow) 
+    {
+        /* compute all 1 x 1 block of X, from rows i..i+1-1 */
+        dReal *ptrBElement;
+
+        dReal Z11, Z12;
+
+        if (subsequentPass)
+        {
+            ptrBElement = B;
+            /* set the Z matrix to 0 */
+            Z11 = 0; Z12 = 0;
+
+            const dReal *ptrLElement = L + blockStartRow * rowSkip;
+
+            /* the inner loop that computes outer products and adds them to Z */
+            unsigned columnCounter = blockStartRow;
+            for (bool exitLoop = columnCounter < 4; !exitLoop; exitLoop = false) 
+            {
+                dReal p1, p2, q1, q2;
+
+                /* load p and q values */
+                p1 = ptrLElement[0];
+                p2 = ptrLElement[1];
+                q1 = ptrBElement[0 * b_stride];
+                q2 = ptrBElement[1 * b_stride];
+
+                /* compute outer product and add it to the Z matrix */
+                Z11 += p1 * q1;
+                Z12 += p2 * q2;
+
+                /* load p and q values */
+                p1 = ptrLElement[2];
+                p2 = ptrLElement[3];
+                q1 = ptrBElement[2 * b_stride];
+                q2 = ptrBElement[3 * b_stride];
+
+                /* compute outer product and add it to the Z matrix */
+                Z11 += p1 * q1;
+                Z12 += p2 * q2;
+
+                if (columnCounter >= (12 + 4))
+                {
+                    columnCounter -= 12;
+
+                    /* advance pointers */
+                    ptrLElement += 12;
+                    ptrBElement += 12 * b_stride;
+
+                    /* load p and q values */
+                    p1 = ptrLElement[-8];
+                    p2 = ptrLElement[-7];
+                    q1 = ptrBElement[-8 * (int)b_stride];
+                    q2 = ptrBElement[-7 * (int)b_stride];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z12 += p2 * q2;
+
+                    /* load p and q values */
+                    p1 = ptrLElement[-6];
+                    p2 = ptrLElement[-5];
+                    q1 = ptrBElement[-6 * (int)b_stride];
+                    q2 = ptrBElement[-5 * (int)b_stride];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z12 += p2 * q2;
+
+                    /* load p and q values */
+                    p1 = ptrLElement[-4];
+                    p2 = ptrLElement[-3];
+                    q1 = ptrBElement[-4 * (int)b_stride];
+                    q2 = ptrBElement[-3 * (int)b_stride];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z12 += p2 * q2;
+
+                    /* load p and q values */
+                    p1 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    q1 = ptrBElement[-2 * (int)b_stride];
+                    q2 = ptrBElement[-1 * (int)b_stride];
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z11 += p1 * q1;
+                    Z12 += p2 * q2;
+                }
+                else
+                {
+                    /* advance pointers */
+                    ptrLElement += 4;
+                    ptrBElement += 4 * b_stride;
+
+                    if ((columnCounter -= 4) < 4)
+                    {
+                        break;
+                    }
+                }
+                /* end of inner loop */
+            }
+
+            /* compute left-over iterations */
+            if ((columnCounter & 2) != 0) 
+            {
+                dReal p1, p2, q1, q2;
+
+                /* load p and q values */
+                p1 = ptrLElement[0];
+                p2 = ptrLElement[1];
+                q1 = ptrBElement[0 * b_stride];
+                q2 = ptrBElement[1 * b_stride];
+
+                /* compute outer product and add it to the Z matrix */
+                Z11 += p1 * q1;
+                Z12 += p2 * q2;
+
+                /* advance pointers */
+                ptrLElement += 2;
+                ptrBElement += 2 * b_stride;
+            }
+
+            if ((columnCounter & 1) != 0)
+            {
+                dReal p1, q1;
+
+                /* load p and q values */
+                p1 = ptrLElement[0];
+                q1 = ptrBElement[0 * b_stride];
+
+                /* compute outer product and add it to the Z matrix */
+                Z11 += p1 * q1;
+
+                /* advance pointers */
+                // ptrLElement += 1; -- not needed any more
+                ptrBElement += 1 * b_stride;
+            }
+
+            /* finish computing the X(i) block */
+            dReal Y11 = ptrBElement[0 * b_stride] - (Z11 + Z12);
+            ptrBElement[0 * b_stride] = Y11;
+        }
+    }
+}
+
+
+template<unsigned int block_step>
+/*static */
+sizeint ThreadedEquationSolverLDLT::estimateCooperativelySolvingL1StraightMemoryRequirement(unsigned rowCount, SolvingL1StraightMemoryEstimates &ref_solvingMemoryEstimates)
+{
+    unsigned blockCount = deriveSolvingL1StraightBlockCount(rowCount, block_step);
+    sizeint descriptorSizeRequired = dEFFICIENT_SIZE(sizeof(cellindexint) * blockCount);
+    sizeint contextSizeRequired = dEFFICIENT_SIZE(sizeof(SolveL1StraightCellContext) * (CCI__MAX + 1) * blockCount);
+    ref_solvingMemoryEstimates.assignData(descriptorSizeRequired, contextSizeRequired);
+
+    sizeint totalSizeRequired = descriptorSizeRequired + contextSizeRequired;
+    return totalSizeRequired;
+}
+
+template<unsigned int block_step>
+/*static */
+void ThreadedEquationSolverLDLT::initializeCooperativelySolveL1StraightMemoryStructures(unsigned rowCount, 
+    atomicord32 &out_blockCompletionProgress, cellindexint *blockProgressDescriptors, SolveL1StraightCellContext *dUNUSED(cellContexts))
+{
+    unsigned blockCount = deriveSolvingL1StraightBlockCount(rowCount, block_step);
+
+    out_blockCompletionProgress = 0;
+    memset(blockProgressDescriptors, 0, blockCount * sizeof(*blockProgressDescriptors));
+}
+
+template<unsigned int block_step, unsigned int b_stride>
+void ThreadedEquationSolverLDLT::participateSolvingL1Straight(const dReal *L, dReal *B, unsigned rowCount, unsigned rowSkip, 
+    volatile atomicord32 &refBlockCompletionProgress/*=0*/, volatile cellindexint *blockProgressDescriptors/*=[blockCount]*/, 
+    SolveL1StraightCellContext *cellContexts/*=[CCI__MAX x blockCount] + [blockCount]*/, unsigned ownThreadIndex)
+{
+    const unsigned lookaheadRange = 32;
+    const unsigned blockCount = deriveSolvingL1StraightBlockCount(rowCount, block_step), lastBlock = blockCount - 1;
+    /* compute rows at end that are not a multiple of block size */
+    const unsigned loopX1RowCount = rowCount % block_step;
+
+    BlockProcessingState blockProcessingState = BPS_NO_BLOCKS_PROCESSED;
+
+    unsigned completedBlocks = refBlockCompletionProgress;
+    unsigned currentBlock = completedBlocks;
+    dIASSERT(completedBlocks <= blockCount);
+
+    for (bool exitLoop = completedBlocks == blockCount; !exitLoop; exitLoop = false)
+    {
+        bool goForLockedBlockPrimaryCalculation = false, goForLockedBlockDuplicateCalculation = false;
+        bool goAssigningTheResult = false, stayWithinTheBlock = false;
+
+        dReal Z[block_step];
+        dReal Y[block_step];
+
+        dReal *ptrBElement;
+
+        CellContextInstance previousContextInstance;
+        unsigned completedColumnBlock;
+        bool partialBlock;
+
+        for (cellindexint testDescriptor = blockProgressDescriptors[currentBlock]; ; )
+        {
+            if (testDescriptor == INVALID_CELLDESCRIPTOR)
+            {
+                // Invalid descriptor is the indication that the row has been fully calculated
+                // Test if this was the last row and break out if so.
+                if (currentBlock + 1 == blockCount)
+                {
+                    exitLoop = true;
+                    break;
+                }
+
+                // Treat detected row advancement as a row processed
+                // blockProcessingState = BPS_SOME_BLOCKS_PROCESSED; <-- performs better without it
+                break;
+            }
+
+            CooperativeAtomics::AtomicReadReorderBarrier();
+            // It is necessary to read up to date completedBblocks value after the descriptor retrieval
+            // as otherwise the logic below breaks
+            completedBlocks = refBlockCompletionProgress;
+
+            if (!GET_CELLDESCRIPTOR_ISLOCKED(testDescriptor))
+            {
+                completedColumnBlock = GET_CELLDESCRIPTOR_COLUMNINDEX(testDescriptor);
+                dIASSERT(completedColumnBlock < currentBlock || (completedColumnBlock == currentBlock && currentBlock == 0)); // Otherwise, why would the calculation have had stopped if the final column is reachable???
+                dIASSERT(completedColumnBlock <= completedBlocks); // Since the descriptor is not locked
+
+                if (completedColumnBlock == completedBlocks && currentBlock != completedBlocks)
+                {
+                    dIASSERT(completedBlocks < currentBlock);
+                    break;
+                }
+
+                if (CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], testDescriptor, MARK_CELLDESCRIPTOR_LOCKED(testDescriptor)))
+                {
+                    if (completedColumnBlock != 0)
+                    {
+                        CellContextInstance contextInstance = GET_CELLDESCRIPTOR_CONTEXTINSTANCE(testDescriptor);
+                        previousContextInstance = contextInstance;
+
+                        const SolveL1StraightCellContext &sourceContext = buildBlockContextRef(cellContexts, currentBlock, contextInstance);
+                        sourceContext.loadPrecalculatedZs(Z);
+                    }
+                    else
+                    {
+                        previousContextInstance = CCI__MIN;
+                        SolveL1StraightCellContext::initializePrecalculatedZs(Z);
+                    }
+
+                    goForLockedBlockPrimaryCalculation = true;
+                    break;
+                }
+
+                if (blockProcessingState != BPS_COMPETING_FOR_A_BLOCK)
+                {
+                    break;
+                }
+
+                testDescriptor = blockProgressDescriptors[currentBlock];
+            }
+            else
+            {
+                if (blockProcessingState != BPS_COMPETING_FOR_A_BLOCK)
+                {
+                    break;
+                }
+
+                cellindexint verificativeDescriptor;
+                bool verificationFailure = false;
+
+                completedColumnBlock = GET_CELLDESCRIPTOR_COLUMNINDEX(testDescriptor);
+                dIASSERT(completedColumnBlock != currentBlock || currentBlock == 0); // There is no reason for computations to stop at the very end other than being the initial value at the very first block
+
+                if (completedColumnBlock != 0)
+                {
+                    CellContextInstance contextInstance = GET_CELLDESCRIPTOR_CONTEXTINSTANCE(testDescriptor);
+                    const SolveL1StraightCellContext &sourceContext = buildBlockContextRef(cellContexts, currentBlock, contextInstance);
+                    sourceContext.loadPrecalculatedZs(Z);
+                }
+                else
+                {
+                    SolveL1StraightCellContext::initializePrecalculatedZs(Z);
+                }
+
+                if (completedColumnBlock != 0 && completedColumnBlock <= currentBlock)
+                {
+                    // Make sure the descriptor is re-read after the precalculates
+                    CooperativeAtomics::AtomicReadReorderBarrier();
+                }
+
+                if (completedColumnBlock <= currentBlock)
+                {
+                    verificativeDescriptor = blockProgressDescriptors[currentBlock];
+                    verificationFailure = verificativeDescriptor != testDescriptor;
+                }
+
+                if (!verificationFailure)
+                {
+                    dIASSERT(completedColumnBlock <= currentBlock + 1);
+
+                    goForLockedBlockDuplicateCalculation = true;
+                    break;
+                }
+
+                testDescriptor = verificativeDescriptor;
+            }
+        }
+
+        if (exitLoop)
+        {
+            break;
+        }
+
+        if (goForLockedBlockPrimaryCalculation)
+        {
+            blockProcessingState = BPS_SOME_BLOCKS_PROCESSED;
+
+            // Declare and assign the variables at the top to not interfere with any branching -- the compiler is going to eliminate them anyway.
+            bool handleComputationTakenOver = false, rowEndReached = false;
+
+            const dReal *ptrLElement;
+            unsigned finalColumnBlock;
+
+            /* check if this is not the partial block of fewer rows */
+            if (currentBlock != lastBlock || loopX1RowCount == 0)
+            {
+                partialBlock = false;
+
+                if (currentBlock != 0)
+                {
+                    ptrLElement = L + (sizeint)(1 + currentBlock * block_step) * rowSkip + completedColumnBlock * block_step;
+                    ptrBElement = B + (sizeint)(completedColumnBlock * block_step) * b_stride;
+
+                    /* the inner loop that computes outer products and adds them to Z */
+                    finalColumnBlock = dMACRO_MIN(currentBlock, completedBlocks);
+                    dIASSERT(completedColumnBlock != finalColumnBlock/* || currentBlock == 0*/);
+
+                    for (unsigned columnCounter = finalColumnBlock - completedColumnBlock; ; )
+                    {
+                        dReal q1, p1, p2, p3, p4;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[0 * b_stride];
+                        p1 = (ptrLElement - rowSkip)[0];
+                        p2 = ptrLElement[0];
+                        ptrLElement += rowSkip;
+                        p3 = ptrLElement[0];
+                        p4 = ptrLElement[0 + rowSkip];
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[0] += p1 * q1;
+                        Z[1] += p2 * q1;
+                        Z[2] += p3 * q1;
+                        Z[3] += p4 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[1 * b_stride];
+                        p3 = ptrLElement[1];
+                        p4 = ptrLElement[1 + rowSkip];
+                        ptrLElement -= rowSkip;
+                        p1 = (ptrLElement - rowSkip)[1];
+                        p2 = ptrLElement[1];
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[0] += p1 * q1;
+                        Z[1] += p2 * q1;
+                        Z[2] += p3 * q1;
+                        Z[3] += p4 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[2 * b_stride];
+                        p1 = (ptrLElement - rowSkip)[2];
+                        p2 = ptrLElement[2];
+                        ptrLElement += rowSkip;
+                        p3 = ptrLElement[2];
+                        p4 = ptrLElement[2 + rowSkip];
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[0] += p1 * q1;
+                        Z[1] += p2 * q1;
+                        Z[2] += p3 * q1;
+                        Z[3] += p4 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[3 * b_stride];
+                        p3 = ptrLElement[3];
+                        p4 = ptrLElement[3 + rowSkip];
+                        ptrLElement -= rowSkip;
+                        p1 = (ptrLElement - rowSkip)[3];
+                        p2 = ptrLElement[3];
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[0] += p1 * q1;
+                        Z[1] += p2 * q1;
+                        Z[2] += p3 * q1;
+                        Z[3] += p4 * q1;
+                        dSASSERT(block_step == 4);
+
+                        if (columnCounter > 3)
+                        {
+                            columnCounter -= 3;
+
+                            ptrLElement += 3 * block_step;
+                            ptrBElement += 3 * block_step * b_stride;
+
+                            /* load p and q values */
+                            q1 = ptrBElement[-8 * (int)b_stride];
+                            p1 = (ptrLElement - rowSkip)[-8];
+                            p2 = ptrLElement[-8];
+                            ptrLElement += rowSkip;
+                            p3 = ptrLElement[-8];
+                            p4 = ptrLElement[-8 + rowSkip];
+
+                            /* compute outer product and add it to the Z matrix */
+                            Z[0] += p1 * q1;
+                            Z[1] += p2 * q1;
+                            Z[2] += p3 * q1;
+                            Z[3] += p4 * q1;
+
+                            /* load p and q values */
+                            q1 = ptrBElement[-7 * (int)b_stride];
+                            p3 = ptrLElement[-7];
+                            p4 = ptrLElement[-7 + rowSkip];
+                            ptrLElement -= rowSkip;
+                            p1 = (ptrLElement - rowSkip)[-7];
+                            p2 = ptrLElement[-7];
+
+                            /* compute outer product and add it to the Z matrix */
+                            Z[0] += p1 * q1;
+                            Z[1] += p2 * q1;
+                            Z[2] += p3 * q1;
+                            Z[3] += p4 * q1;
+
+                            /* load p and q values */
+                            q1 = ptrBElement[-6 * (int)b_stride];
+                            p1 = (ptrLElement - rowSkip)[-6];
+                            p2 = ptrLElement[-6];
+                            ptrLElement += rowSkip;
+                            p3 = ptrLElement[-6];
+                            p4 = ptrLElement[-6 + rowSkip];
+
+                            /* compute outer product and add it to the Z matrix */
+                            Z[0] += p1 * q1;
+                            Z[1] += p2 * q1;
+                            Z[2] += p3 * q1;
+                            Z[3] += p4 * q1;
+
+                            /* load p and q values */
+                            q1 = ptrBElement[-5 * (int)b_stride];
+                            p3 = ptrLElement[-5];
+                            p4 = ptrLElement[-5 + rowSkip];
+                            ptrLElement -= rowSkip;
+                            p1 = (ptrLElement - rowSkip)[-5];
+                            p2 = ptrLElement[-5];
+
+                            /* compute outer product and add it to the Z matrix */
+                            Z[0] += p1 * q1;
+                            Z[1] += p2 * q1;
+                            Z[2] += p3 * q1;
+                            Z[3] += p4 * q1;
+
+                            /* load p and q values */
+                            q1 = ptrBElement[-4 * (int)b_stride];
+                            p1 = (ptrLElement - rowSkip)[-4];
+                            p2 = ptrLElement[-4];
+                            ptrLElement += rowSkip;
+                            p3 = ptrLElement[-4];
+                            p4 = ptrLElement[-4 + rowSkip];
+
+                            /* compute outer product and add it to the Z matrix */
+                            Z[0] += p1 * q1;
+                            Z[1] += p2 * q1;
+                            Z[2] += p3 * q1;
+                            Z[3] += p4 * q1;
+
+                            /* load p and q values */
+                            q1 = ptrBElement[-3 * (int)b_stride];
+                            p3 = ptrLElement[-3];
+                            p4 = ptrLElement[-3 + rowSkip];
+                            ptrLElement -= rowSkip;
+                            p1 = (ptrLElement - rowSkip)[-3];
+                            p2 = ptrLElement[-3];
+
+                            /* compute outer product and add it to the Z matrix */
+                            Z[0] += p1 * q1;
+                            Z[1] += p2 * q1;
+                            Z[2] += p3 * q1;
+                            Z[3] += p4 * q1;
+
+                            /* load p and q values */
+                            q1 = ptrBElement[-2 * (int)b_stride];
+                            p1 = (ptrLElement - rowSkip)[-2];
+                            p2 = ptrLElement[-2];
+                            ptrLElement += rowSkip;
+                            p3 = ptrLElement[-2];
+                            p4 = ptrLElement[-2 + rowSkip];
+
+                            /* compute outer product and add it to the Z matrix */
+                            Z[0] += p1 * q1;
+                            Z[1] += p2 * q1;
+                            Z[2] += p3 * q1;
+                            Z[3] += p4 * q1;
+
+                            /* load p and q values */
+                            q1 = ptrBElement[-1 * (int)b_stride];
+                            p3 = ptrLElement[-1];
+                            p4 = ptrLElement[-1 + rowSkip];
+                            ptrLElement -= rowSkip;
+                            p1 = (ptrLElement - rowSkip)[-1];
+                            p2 = ptrLElement[-1];
+
+                            /* compute outer product and add it to the Z matrix */
+                            Z[0] += p1 * q1;
+                            Z[1] += p2 * q1;
+                            Z[2] += p3 * q1;
+                            Z[3] += p4 * q1;
+                            dSASSERT(block_step == 4);
+                        }
+                        else
+                        {
+                            ptrLElement += block_step;
+                            ptrBElement += block_step * b_stride;
+
+                            if (--columnCounter == 0)
+                            {
+                                if (finalColumnBlock == currentBlock)
+                                {
+                                    rowEndReached = true;
+                                    break;
+                                }
+
+                                // Take a look if any more columns have been completed...
+                                completedBlocks = refBlockCompletionProgress;
+                                dIASSERT(completedBlocks >= finalColumnBlock);
+
+                                if (completedBlocks == finalColumnBlock)
+                                {
+                                    break;
+                                }
+
+                                // ...continue if so.
+                                unsigned columnCompletedSoFar = finalColumnBlock;
+                                finalColumnBlock = dMACRO_MIN(currentBlock, completedBlocks);
+                                columnCounter = finalColumnBlock - columnCompletedSoFar;
+                            }
+                        }
+                        /* end of inner loop */
+                    }
+                }
+                else
+                {
+                    ptrLElement = L + (sizeint)(1/* + currentBlock * block_step*/) * rowSkip/* + completedColumnBlock * block_step*/;
+                    ptrBElement = B/* + (sizeint)(completedColumnBlock * block_step) * b_stride*/;
+                    dIASSERT(completedColumnBlock == 0);
+
+                    rowEndReached = true;
+                }
+            }
+            else
+            {
+                partialBlock = true;
+
+                if (currentBlock != 0)
+                {
+                    dReal tempZ[dMACRO_MAX(block_step - 1U, 1U)] = { REAL(0.0), };
+
+                    ptrLElement = L + (sizeint)(/*1 + */currentBlock * block_step) * rowSkip + completedColumnBlock * block_step;
+                    ptrBElement = B + (sizeint)(completedColumnBlock * block_step) * b_stride;
+
+                    /* the inner loop that computes outer products and adds them to Z */
+                    finalColumnBlock = dMACRO_MIN(currentBlock, completedBlocks);
+                    dIASSERT(completedColumnBlock != finalColumnBlock/* || currentBlock == 0*/);
+
+                    for (unsigned partialRow = 0, columnCompletedSoFar = completedColumnBlock; ; )
+                    {
+                        dReal Z1 = 0, Z2 = 0, Z3 = 0, Z4 = 0;
+
+                        for (unsigned columnCounter = finalColumnBlock - columnCompletedSoFar; ; )
+                        {
+                            dReal q1, q2, q3, q4, p1, p2, p3, p4;
+
+                            /* load p and q values */
+                            q1 = ptrBElement[0 * b_stride];
+                            q2 = ptrBElement[1 * b_stride];
+                            q3 = ptrBElement[2 * b_stride];
+                            q4 = ptrBElement[3 * b_stride];
+                            p1 = ptrLElement[0];
+                            p2 = ptrLElement[1];
+                            p3 = ptrLElement[2];
+                            p4 = ptrLElement[3];
+
+                            /* compute outer product and add it to the Z matrix */
+                            Z1 += p1 * q1;
+                            Z2 += p2 * q2;
+                            Z3 += p3 * q3;
+                            Z4 += p4 * q4;
+                            dSASSERT(block_step == 4);
+
+                            if (columnCounter > 3)
+                            {
+                                columnCounter -= 3;
+
+                                ptrLElement += 3 * block_step;
+                                ptrBElement += 3 * block_step * b_stride;
+                            
+                                /* load p and q values */
+                                q1 = ptrBElement[-8 * (int)b_stride];
+                                q2 = ptrBElement[-7 * (int)b_stride];
+                                q3 = ptrBElement[-6 * (int)b_stride];
+                                q4 = ptrBElement[-5 * (int)b_stride];
+                                p1 = ptrLElement[-8];
+                                p2 = ptrLElement[-7];
+                                p3 = ptrLElement[-6];
+                                p4 = ptrLElement[-5];
+
+                                /* compute outer product and add it to the Z matrix */
+                                Z1 += p1 * q1;
+                                Z2 += p2 * q2;
+                                Z3 += p3 * q3;
+                                Z4 += p4 * q4;
+
+                                /* load p and q values */
+                                q1 = ptrBElement[-4 * (int)b_stride];
+                                q2 = ptrBElement[-3 * (int)b_stride];
+                                q3 = ptrBElement[-2 * (int)b_stride];
+                                q4 = ptrBElement[-1 * (int)b_stride];
+                                p1 = ptrLElement[-4];
+                                p2 = ptrLElement[-3];
+                                p3 = ptrLElement[-2];
+                                p4 = ptrLElement[-1];
+
+                                /* compute outer product and add it to the Z matrix */
+                                Z1 += p1 * q1;
+                                Z2 += p2 * q2;
+                                Z3 += p3 * q3;
+                                Z4 += p4 * q4;
+                                dSASSERT(block_step == 4);
+                            }
+                            else
+                            {
+                                ptrLElement += block_step;
+                                ptrBElement += block_step * b_stride;
+
+                                if (--columnCounter == 0)
+                                {
+                                    break;
+                                }
+                            }
+                            /* end of inner loop */
+                        }
+
+                        tempZ[partialRow] += Z1 + Z2 + Z3 + Z4;
+
+                        if (++partialRow == loopX1RowCount)
+                        {
+                            // Here switch is used to avoid accessing Z by parametrized index. 
+                            // So far all the accesses were performed by explicit constants
+                            // what lets the compiler treat Z elements as individual variables 
+                            // rather than array elements.
+                            Z[0] += tempZ[0];
+
+                            if (loopX1RowCount >= 2)
+                            {
+                                Z[1] += tempZ[1];
+
+                                if (loopX1RowCount > 2)
+                                {
+                                    Z[2] += tempZ[2];
+                                }
+                            }
+                            dSASSERT(block_step == 4);
+
+                            if (finalColumnBlock == currentBlock)
+                            {
+                                if (loopX1RowCount > 2)
+                                {
+                                    // Correct the LElement so that it points to the second row
+                                    //
+                                    // Note, that ff there is just one partial row, it does not matter that 
+                                    // the LElement will remain pointing at the first row, 
+                                    // since the former is not going to be used in that case.
+                                    ptrLElement -= /*(sizeint)*/rowSkip/* * (loopX1RowCount - 2)*/; dIASSERT(loopX1RowCount == 3);
+                                }
+                                dSASSERT(block_step == 4);
+
+                                rowEndReached = true;
+                                break;
+                            }
+
+                            // Take a look if any more columns have been completed...
+                            completedBlocks = refBlockCompletionProgress;
+                            dIASSERT(completedBlocks >= finalColumnBlock);
+
+                            if (completedBlocks == finalColumnBlock)
+                            {
+                                break;
+                            }
+
+                            std::fill(tempZ, tempZ + loopX1RowCount, REAL(0.0));
+                            partialRow = 0;
+
+                            // Correct the LElement pointer to continue at the first partial row
+                            ptrLElement -= (sizeint)rowSkip * (loopX1RowCount - 1);
+
+                            // ...continue if so.
+                            columnCompletedSoFar = finalColumnBlock;
+                            finalColumnBlock = dMACRO_MIN(currentBlock, completedBlocks);
+                        }
+                        else
+                        {
+                            ptrLElement += rowSkip - (finalColumnBlock - columnCompletedSoFar) * block_step;
+                            ptrBElement -= (sizeint)((finalColumnBlock - columnCompletedSoFar) * block_step) * b_stride;
+                        }
+                        /* end of loop by individual rows */
+                    }
+                }
+                else
+                {
+                    ptrLElement = L + (sizeint)(1/* + currentBlock * block_step*/) * rowSkip/* + completedColumnBlock * block_step*/;
+                    ptrBElement = B/* + (sizeint)(completedColumnBlock * block_step) * b_stride*/;
+                    dIASSERT(completedColumnBlock == 0);
+
+                    rowEndReached = true;
+                }
+            }
+
+            if (rowEndReached)
+            {
+                // Check whether there is still a need to proceed or if the computation has been taken over by another thread
+                cellindexint oldDescriptor = MAKE_CELLDESCRIPTOR(completedColumnBlock, previousContextInstance, true);
+
+                if (blockProgressDescriptors[currentBlock] == oldDescriptor)
+                {
+                    /* finish computing the X(i) block */
+                    if (!partialBlock)
+                    {
+                        Y[0] = ptrBElement[0 * b_stride] - Z[0];
+
+                        dReal p2 = ptrLElement[0];
+                        Y[1] = ptrBElement[1 * b_stride] - Z[1] - p2 * Y[0];
+
+                        ptrLElement += rowSkip;
+
+                        dReal p3 = ptrLElement[0];
+                        dReal p3_1 = ptrLElement[1];
+                        Y[2] = ptrBElement[2 * b_stride] - Z[2] - p3 * Y[0] - p3_1 * Y[1];
+
+                        dReal p4 = ptrLElement[rowSkip];
+                        dReal p4_1 = ptrLElement[1 + rowSkip];
+                        dReal p4_2 = ptrLElement[2 + rowSkip];
+                        Y[3] = ptrBElement[3 * b_stride] - Z[3] - p4 * Y[0] - p4_1 * Y[1] - p4_2 * Y[2];
+                        dSASSERT(block_step == 4);
+                    }
+                    else
+                    {
+                        Y[0] = ptrBElement[0 * b_stride] - Z[0];
+
+                        if (loopX1RowCount >= 2)
+                        {
+                            dReal p2 = ptrLElement[0];
+                            Y[1] = ptrBElement[1 * b_stride] - Z[1] - p2 * Y[0];
+
+                            if (loopX1RowCount > 2)
+                            {
+                                dReal p3 = ptrLElement[0 + rowSkip];
+                                dReal p3_1 = ptrLElement[1 + rowSkip];
+                                Y[2] = ptrBElement[2 * b_stride] - Z[2] - p3 * Y[0] - p3_1 * Y[1];
+                            }
+                        }
+                        dSASSERT(block_step == 4);
+                    }
+
+                    // Use atomic memory barrier to make sure memory reads of ptrBElement[] and blockProgressDescriptors[] are not swapped
+                    CooperativeAtomics::AtomicReadReorderBarrier();
+
+                    // The descriptor has not been altered yet - this means the ptrBElement[] values used above were not modified yet 
+                    // and the computation result is valid.
+                    if (blockProgressDescriptors[currentBlock] == oldDescriptor)
+                    {
+                        // Assign the results to the result context (possibly in parallel with other threads 
+                        // that could and ought to be assigning exactly the same values)
+                        SolveL1StraightCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                        resultContext.storePrecalculatedZs(Y);
+
+                        // Assign the result assignment progress descriptor
+                        cellindexint newDescriptor = MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true);
+                        CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], oldDescriptor, newDescriptor); // the result is to be ignored
+
+                        // Whether succeeded or not, the result is valid, so go on trying to assign it to the matrix
+                        goAssigningTheResult = true;
+                    }
+                    else
+                    {
+                        // Otherwise, go on competing for copying the results
+                        handleComputationTakenOver = true;
+                    }
+                }
+                else
+                {
+                    handleComputationTakenOver = true;
+                }
+            }
+            else
+            {
+                // If the final column has not been reached yet, store current values to the context.
+                // Select the other context instance as the previous one might be read by other threads.
+                CellContextInstance nextContextInstance = buildNextContextInstance(previousContextInstance);
+                SolveL1StraightCellContext &destinationContext = buildBlockContextRef(cellContexts, currentBlock, nextContextInstance);
+                destinationContext.storePrecalculatedZs(Z);
+
+                // Unlock the row until more columns can be used
+                cellindexint oldDescriptor = MAKE_CELLDESCRIPTOR(completedColumnBlock, previousContextInstance, true);
+                cellindexint newDescriptor = MAKE_CELLDESCRIPTOR(finalColumnBlock, nextContextInstance, false);
+                // The descriptor might have been updated by a competing thread
+                if (!CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], oldDescriptor, newDescriptor))
+                {
+                    // Adjust the ptrBElement to point to the result area...
+                    ptrBElement = B + (sizeint)(currentBlock * block_step) * b_stride;
+                    // ...and go on handling the case
+                    handleComputationTakenOver = true;
+                }
+            }
+
+            if (handleComputationTakenOver)
+            {
+                cellindexint existingDescriptor = blockProgressDescriptors[currentBlock];
+                // This can only happen if the row was (has become) the uppermost not fully completed one
+                // and the competing thread is at final stage of calculation (i.e., it has reached the currentBlock column).
+                if (existingDescriptor != INVALID_CELLDESCRIPTOR)
+                {
+                    // If not fully completed this must be the final stage of the result assignment into the matrix
+                    dIASSERT(existingDescriptor == MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true));
+
+                    // Go on competing copying the result as anyway the block is the topmost not completed one
+                    // and since there was competition for it, there is no other work that can be done right now.
+                    const SolveL1StraightCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                    resultContext.loadPrecalculatedZs(Y);
+
+                    goAssigningTheResult = true;
+                }
+                else 
+                {
+                    // everything is over -- just go handling next blocks
+                }
+            }
+        }
+        else if (goForLockedBlockDuplicateCalculation)
+        {
+            blockProcessingState = BPS_SOME_BLOCKS_PROCESSED;
+
+            bool skipToHandlingSubsequentRows = false, skiptoCopyingResult = false;
+
+            /* declare variables */
+            const dReal *ptrLElement;
+
+            if (completedColumnBlock < currentBlock)
+            {
+                /* check if this is not the partial block of fewer rows */
+                if (currentBlock != lastBlock || loopX1RowCount == 0)
+                {
+                    partialBlock = false;
+
+                    ptrLElement = L + (sizeint)(1 + currentBlock * block_step) * rowSkip + completedColumnBlock * block_step;
+                    ptrBElement = B + (sizeint)(completedColumnBlock * block_step) * b_stride;
+
+                    /* the inner loop that computes outer products and adds them to Z */
+                    unsigned finalColumnBlock = currentBlock;
+                    dIASSERT(currentBlock == completedBlocks); // Why would we be competing for a row otherwise?
+
+                    unsigned lastCompletedColumn = completedColumnBlock;
+                    unsigned columnCounter = finalColumnBlock - completedColumnBlock;
+                    for (bool exitInnerLoop = false; !exitInnerLoop; exitInnerLoop = --columnCounter == 0)
+                    {
+                        dReal q1, p1, p2, p3, p4;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[0 * b_stride];
+                        p1 = (ptrLElement - rowSkip)[0];
+                        p2 = ptrLElement[0];
+                        ptrLElement += rowSkip;
+                        p3 = ptrLElement[0];
+                        p4 = ptrLElement[0 + rowSkip];
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[0] += p1 * q1;
+                        Z[1] += p2 * q1;
+                        Z[2] += p3 * q1;
+                        Z[3] += p4 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[1 * b_stride];
+                        p3 = ptrLElement[1];
+                        p4 = ptrLElement[1 + rowSkip];
+                        ptrLElement -= rowSkip;
+                        p1 = (ptrLElement - rowSkip)[1];
+                        p2 = ptrLElement[1];
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[0] += p1 * q1;
+                        Z[1] += p2 * q1;
+                        Z[2] += p3 * q1;
+                        Z[3] += p4 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[2 * b_stride];
+                        p1 = (ptrLElement - rowSkip)[2];
+                        p2 = ptrLElement[2];
+                        ptrLElement += rowSkip;
+                        p3 = ptrLElement[2];
+                        p4 = ptrLElement[2 + rowSkip];
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[0] += p1 * q1;
+                        Z[1] += p2 * q1;
+                        Z[2] += p3 * q1;
+                        Z[3] += p4 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[3 * b_stride];
+                        p3 = ptrLElement[3];
+                        p4 = ptrLElement[3 + rowSkip];
+                        ptrLElement -= rowSkip;
+                        p1 = (ptrLElement - rowSkip)[3];
+                        p2 = ptrLElement[3];
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[0] += p1 * q1;
+                        Z[1] += p2 * q1;
+                        Z[2] += p3 * q1;
+                        Z[3] += p4 * q1;
+                        dSASSERT(block_step == 4);
+
+                        // Check if the primary solver thread has not made any progress
+                        cellindexint descriptorVerification = blockProgressDescriptors[currentBlock];
+                        unsigned newCompletedColumn = GET_CELLDESCRIPTOR_COLUMNINDEX(descriptorVerification);
+
+                        if (newCompletedColumn != lastCompletedColumn)
+                        {
+                            // Check, this is the first change the current thread detects.
+                            // There is absolutely no reason in code for the computation to stop/resume twice 
+                            // while the current thread is competing.
+                            dIASSERT(lastCompletedColumn == completedColumnBlock);
+
+                            if (descriptorVerification == INVALID_CELLDESCRIPTOR)
+                            {
+                                skipToHandlingSubsequentRows = true;
+                                break;
+                            }
+
+                            if (newCompletedColumn == currentBlock + 1)
+                            {
+                                skiptoCopyingResult = true;
+                                break;
+                            }
+
+                            // Check if the current thread is behind
+                            if (newCompletedColumn > finalColumnBlock - columnCounter)
+                            {
+                                // If so, go starting over one more time
+                                blockProcessingState = BPS_COMPETING_FOR_A_BLOCK;
+                                stayWithinTheBlock = true;
+                                skipToHandlingSubsequentRows = true;
+                                break;
+                            }
+
+                            // If current thread is ahead, just save new completed column for further comparisons and go on calculating
+                            lastCompletedColumn = newCompletedColumn;
+                        }
+
+                        /* advance pointers */
+                        ptrLElement += block_step;
+                        ptrBElement += block_step * b_stride;
+                        /* end of inner loop */
+                    }
+                }
+                else
+                {
+                    partialBlock = true;
+
+                    dReal tempZ[dMACRO_MAX(block_step - 1U, 1U)] = { REAL(0.0), };
+
+                    ptrLElement = L + (sizeint)(/*1 + */currentBlock * block_step) * rowSkip + completedColumnBlock * block_step;
+                    ptrBElement = B + (sizeint)(completedColumnBlock * block_step) * b_stride;
+
+                    /* the inner loop that computes outer products and adds them to Z */
+                    unsigned finalColumnBlock = currentBlock;
+                    dIASSERT(currentBlock == completedBlocks); // Why would we be competing for a row otherwise?
+
+                    unsigned lastCompletedColumn = completedColumnBlock;
+                    for (unsigned columnCounter = finalColumnBlock - completedColumnBlock; ; )
+                    {
+                        dReal q1, q2, q3, q4;
+
+                        /* load q values */
+                        q1 = ptrBElement[0 * b_stride];
+                        q2 = ptrBElement[1 * b_stride];
+                        q3 = ptrBElement[2 * b_stride];
+                        q4 = ptrBElement[3 * b_stride];
+
+                        for (unsigned partialRow = 0; ; )
+                        {
+                            dReal p1, p2, p3, p4;
+
+                            /* load p values */
+                            p1 = ptrLElement[0];
+                            p2 = ptrLElement[1];
+                            p3 = ptrLElement[2];
+                            p4 = ptrLElement[3];
+
+                            /* compute outer product and add it to the Z matrix */
+                            tempZ[partialRow] += p1 * q1 + p2 * q2 + p3 * q3 + p4 * q4;
+                            dSASSERT(block_step == 4);
+
+                            if (++partialRow == loopX1RowCount)
+                            {
+                                break;
+                            }
+
+                            ptrLElement += rowSkip;
+                        }
+
+                        // Check if the primary solver thread has not made any progress
+                        cellindexint descriptorVerification = blockProgressDescriptors[currentBlock];
+                        unsigned newCompletedColumn = GET_CELLDESCRIPTOR_COLUMNINDEX(descriptorVerification);
+
+                        if (newCompletedColumn != lastCompletedColumn)
+                        {
+                            // Check, this is the first change the current thread detects.
+                            // There is absolutely no reason in code for the computation to stop/resume twice 
+                            // while the current thread is competing.
+                            dIASSERT(lastCompletedColumn == completedColumnBlock);
+
+                            if (descriptorVerification == INVALID_CELLDESCRIPTOR)
+                            {
+                                skipToHandlingSubsequentRows = true;
+                                break;
+                            }
+
+                            if (newCompletedColumn == currentBlock + 1)
+                            {
+                                skiptoCopyingResult = true;
+                                break;
+                            }
+
+                            // Check if the current thread is behind
+                            if (newCompletedColumn > finalColumnBlock - columnCounter)
+                            {
+                                // If so, go starting over one more time
+                                blockProcessingState = BPS_COMPETING_FOR_A_BLOCK;
+                                stayWithinTheBlock = true;
+                                skipToHandlingSubsequentRows = true;
+                                break;
+                            }
+
+                            // If current thread is ahead, just save new completed column for further comparisons and go on calculating
+                            lastCompletedColumn = newCompletedColumn;
+                        }
+
+                        ptrLElement += block_step;
+                        ptrBElement += block_step * b_stride;
+
+                        if (--columnCounter == 0)
+                        {
+                            // Here switch is used to avoid accessing Z by parametrized index. 
+                            // So far all the accesses were performed by explicit constants
+                            // what lets the compiler treat Z elements as individual variables 
+                            // rather than array elements.
+                            Z[0] += tempZ[0];
+
+                            if (loopX1RowCount >= 2)
+                            {
+                                Z[1] += tempZ[1];
+
+                                if (loopX1RowCount > 2)
+                                {
+                                    Z[2] += tempZ[2];
+
+                                    // Correct the LElement so that it points to the second row
+                                    //
+                                    // Note, that if there is just one partial row, it does not matter that 
+                                    // the LElement will remain pointing at the first row, 
+                                    // since the former is not going to be used in that case.
+                                    ptrLElement -= /*(sizeint)*/rowSkip/* * (loopX1RowCount - 2)*/; dIASSERT(loopX1RowCount == 3);
+                                }
+                            }
+                            dSASSERT(block_step == 4);
+
+                            break;
+                        }
+
+                        /* advance pointers */
+                        ptrLElement -= (sizeint)rowSkip * (loopX1RowCount - 1);
+                        /* end of inner loop */
+                    }
+                }
+            }
+            else if (completedColumnBlock > currentBlock)
+            {
+                dIASSERT(completedColumnBlock == currentBlock + 1);
+
+                partialBlock = currentBlock == lastBlock && loopX1RowCount != 0;
+
+                skiptoCopyingResult = true;
+            }
+            else
+            {
+                dIASSERT(currentBlock == 0); // Execution can get here within the very first block only
+
+                partialBlock = rowCount < block_step;
+
+                /* assign the pointers appropriately and go on computing the results */
+                ptrLElement = L + (sizeint)(1/* + currentBlock * block_step*/) * rowSkip/* + completedColumnBlock * block_step*/;
+                ptrBElement = B/* + (sizeint)(completedColumnBlock * block_step) * b_stride*/;
+            }
+
+            if (!skipToHandlingSubsequentRows)
+            {
+                if (!skiptoCopyingResult)
+                {
+                    if (!partialBlock)
+                    {
+                        Y[0] = ptrBElement[0 * b_stride] - Z[0];
+
+                        dReal p2 = ptrLElement[0];
+                        Y[1] = ptrBElement[1 * b_stride] - Z[1] - p2 * Y[0];
+
+                        ptrLElement += rowSkip;
+
+                        dReal p3 = ptrLElement[0];
+                        dReal p3_1 = ptrLElement[1];
+                        Y[2] = ptrBElement[2 * b_stride] - Z[2] - p3 * Y[0] - p3_1 * Y[1];
+
+                        dReal p4 = ptrLElement[rowSkip];
+                        dReal p4_1 = ptrLElement[1 + rowSkip];
+                        dReal p4_2 = ptrLElement[2 + rowSkip];
+                        Y[3] = ptrBElement[3 * b_stride] - Z[3] - p4 * Y[0] - p4_1 * Y[1] - p4_2 * Y[2];
+                        dSASSERT(block_step == 4);
+                    }
+                    else
+                    {
+                        Y[0] = ptrBElement[0 * b_stride] - Z[0];
+
+                        if (loopX1RowCount >= 2)
+                        {
+                            dReal p2 = ptrLElement[0];
+                            Y[1] = ptrBElement[1 * b_stride] - Z[1] - p2 * Y[0];
+
+                            if (loopX1RowCount > 2)
+                            {
+                                dReal p3 = ptrLElement[0 + rowSkip];
+                                dReal p3_1 = ptrLElement[1 + rowSkip];
+                                Y[2] = ptrBElement[2 * b_stride] - Z[2] - p3 * Y[0] - p3_1 * Y[1];
+                            }
+                        }
+                        dSASSERT(block_step == 4);
+                    }
+
+                    CooperativeAtomics::AtomicReadReorderBarrier();
+
+                    // Use atomic load to make sure memory reads of ptrBElement[] and blockProgressDescriptors[] are not swapped
+                    cellindexint existingDescriptor = blockProgressDescriptors[currentBlock];
+
+                    if (existingDescriptor == INVALID_CELLDESCRIPTOR)
+                    {
+                        // Everything is over -- proceed to subsequent rows
+                        skipToHandlingSubsequentRows = true;
+                    }
+                    else if (existingDescriptor == MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true))
+                    {
+                        // The values computed above may not be valid. Copy the values already in the result context.
+                        skiptoCopyingResult = true;
+                    }
+                    else
+                    {
+                        // The descriptor has not been altered yet - this means the ptrBElement[] values used above were not modified yet 
+                        // and the computation result is valid.
+                        cellindexint newDescriptor = MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true); // put the computation at the top so that the evaluation result from the expression above is reused
+
+                        // Assign the results to the result context (possibly in parallel with other threads 
+                        // that could and ought to be assigning exactly the same values)
+                        SolveL1StraightCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                        resultContext.storePrecalculatedZs(Y);
+
+                        // Assign the result assignment progress descriptor
+                        CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], existingDescriptor, newDescriptor); // the result is to be ignored
+
+                        // Whether succeeded or not, the result is valid, so go on trying to assign it to the matrix
+                    }
+                }
+
+                if (!skipToHandlingSubsequentRows)
+                {
+                    if (skiptoCopyingResult)
+                    {
+                        // Extract the result values stored in the result context
+                        const SolveL1StraightCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                        resultContext.loadPrecalculatedZs(Y);
+
+                        ptrBElement = B + (sizeint)(currentBlock * block_step) * b_stride;
+                    }
+
+                    goAssigningTheResult = true;
+                }
+            }
+        }
+
+        if (goAssigningTheResult)
+        {
+            cellindexint existingDescriptor = blockProgressDescriptors[currentBlock];
+            // Check if the assignment has not been completed yet
+            if (existingDescriptor != INVALID_CELLDESCRIPTOR)
+            {
+                // Assign the computation results to the B vector
+                if (!partialBlock)
+                {
+                    ptrBElement[0 * b_stride] = Y[0];
+                    ptrBElement[1 * b_stride] = Y[1];
+                    ptrBElement[2 * b_stride] = Y[2];
+                    ptrBElement[3 * b_stride] = Y[3];
+                    dSASSERT(block_step == 4);
+                }
+                else
+                {
+                    ptrBElement[0 * b_stride] = Y[0];
+
+                    if (loopX1RowCount >= 2)
+                    {
+                        ptrBElement[1 * b_stride] = Y[1];
+
+                        if (loopX1RowCount > 2)
+                        {
+                            ptrBElement[2 * b_stride] = Y[2];
+                        }
+                    }
+                    dSASSERT(block_step == 4);
+                }
+
+                ThrsafeIncrementIntUpToLimit(&refBlockCompletionProgress, currentBlock + 1);
+                dIASSERT(refBlockCompletionProgress >= currentBlock + 1);
+
+                // And assign the completed status no matter what
+                CooperativeAtomics::AtomicStoreCellindexint(&blockProgressDescriptors[currentBlock], INVALID_CELLDESCRIPTOR);
+            }
+            else 
+            {
+                // everything is over -- just go handling next blocks
+            }
+        }
+
+        if (!stayWithinTheBlock)
+        {
+            completedBlocks = refBlockCompletionProgress;
+
+            if (completedBlocks == blockCount)
+            {
+                break;
+            }
+
+            currentBlock += 1;
+
+            bool lookaheadBoundaryReached = false;
+
+            if (currentBlock == blockCount || completedBlocks == 0)
+            {
+                lookaheadBoundaryReached = true;
+            }
+            else if (currentBlock >= completedBlocks + lookaheadRange)
+            {
+                lookaheadBoundaryReached = blockProcessingState > BPS_NO_BLOCKS_PROCESSED;
+            }
+            else if (currentBlock < completedBlocks)
+            {
+                // Treat detected row advancement as a row processed
+                // blockProcessingState = BPS_SOME_BLOCKS_PROCESSED; <-- performs better without it
+
+                currentBlock = completedBlocks;
+            }
+
+            if (lookaheadBoundaryReached)
+            {
+                dIASSERT(blockProcessingState != BPS_COMPETING_FOR_A_BLOCK); // Why did not we compete???
+
+                // If no row has been processed in the previous pass, compete for the next row to avoid cycling uselessly
+                if (blockProcessingState <= BPS_NO_BLOCKS_PROCESSED)
+                {
+                    // Abandon job if too few blocks remain
+                    if (blockCount - completedBlocks <= ownThreadIndex)
+                    {
+                        break;
+                    }
+
+                    blockProcessingState = BPS_COMPETING_FOR_A_BLOCK;
+                }
+                else
+                {
+                    // If there was some progress, just continue to the next pass
+                    blockProcessingState = BPS_NO_BLOCKS_PROCESSED;
+                }
+
+                currentBlock = completedBlocks;
+            }
+        }
+    }
+}
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/fastltsolve.cpp b/libs/ode-0.16.1/ode/src/fastltsolve.cpp
new file mode 100644
index 0000000..e9c7ec5
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastltsolve.cpp
@@ -0,0 +1,229 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * L1Transposed Equation Solving Routines
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+#include <ode/common.h>
+#include <ode/matrix.h>
+#include <ode/matrix_coop.h>
+#include "config.h"
+#include "threaded_solver_ldlt.h"
+#include "threading_base.h"
+#include "resource_control.h"
+#include "error.h"
+
+#include "fastltsolve_impl.h"
+
+
+/*static */
+void ThreadedEquationSolverLDLT::estimateCooperativeSolvingL1TransposedResourceRequirements(
+    dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+    unsigned allowedThreadCount, unsigned rowCount)
+{
+    dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
+    unsigned limitedThreadCount = restrictSolvingL1TransposedAllowedThreadCount(threading, allowedThreadCount, rowCount);
+
+    if (limitedThreadCount > 1)
+    {
+        doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, rowCount);
+    }
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::cooperativelySolveL1Transposed(
+    dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+    const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+    dIASSERT(rowCount != 0);
+
+    dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+    unsigned limitedThreadCount = restrictSolvingL1TransposedAllowedThreadCount(threading, allowedThreadCount, rowCount);
+
+    if (limitedThreadCount <= 1)
+    {
+        solveL1Transposed<SL1T_B_STRIDE>(L, b, rowCount, rowSkip);
+    }
+    else
+    {
+        doCooperativelySolveL1TransposedValidated(resourceContainer, limitedThreadCount, L, b, rowCount, rowSkip);
+    }
+}
+
+
+/*static */
+unsigned ThreadedEquationSolverLDLT::restrictSolvingL1TransposedAllowedThreadCount(
+    dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount)
+{
+    unsigned limitedThreadCount = 1;
+
+#if dCOOPERATIVE_ENABLED
+    const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
+    unsigned solvingBlockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
+    dIASSERT(deriveSolvingL1TransposedThreadCount(SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM, 2) > 1);
+
+    if (solvingBlockCount >= SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+    {
+        limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, true);
+    }
+#endif // #if dCOOPERATIVE_ENABLED
+
+    return limitedThreadCount;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(
+    dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+    unsigned allowedThreadCount, unsigned rowCount)
+{
+    const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
+    unsigned blockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
+    dIASSERT(blockCount >= 1);
+
+    unsigned threadCountToUse = deriveSolvingL1TransposedThreadCount(blockCount, allowedThreadCount);
+    dIASSERT(threadCountToUse > 1);
+
+    unsigned simultaneousCallCount = 1 + (threadCountToUse - 1);
+
+    SolvingL1TransposedMemoryEstimates solvingMemoryEstimates;
+    sizeint solvingMemoryRequired = estimateCooperativelySolvingL1TransposedMemoryRequirement<blockStep>(rowCount, solvingMemoryEstimates);
+    const unsigned solvingAlignmentRequired = ALLOCATION_DEFAULT_ALIGNMENT;
+
+    unsigned featureRequirement = dxResourceRequirementDescriptor::STOCK_CALLWAIT_REQUIRED;
+    summaryRequirementsDescriptor->mergeAnotherDescriptorIn(solvingMemoryRequired, solvingAlignmentRequired, simultaneousCallCount, featureRequirement);
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::doCooperativelySolveL1TransposedValidated(
+    dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+    const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+    dIASSERT(allowedThreadCount > 1);
+
+    const unsigned int blockStep = SL1T_BLOCK_SIZE; // Required by the implementation
+    unsigned blockCount = deriveSolvingL1TransposedBlockCount(rowCount, blockStep);
+    dIASSERT(blockCount >= 1);
+
+    unsigned threadCountToUse = deriveSolvingL1TransposedThreadCount(blockCount, allowedThreadCount);
+    dIASSERT(threadCountToUse > 1);
+
+    dCallWaitID completionWait = resourceContainer->getStockCallWait();
+    dAASSERT(completionWait != NULL);
+
+    atomicord32 blockCompletionProgress;
+    cellindexint *blockProgressDescriptors;
+    SolveL1TransposedCellContext *cellContexts;
+
+    SolvingL1TransposedMemoryEstimates solvingMemoryEstimates;
+    sizeint solvingMemoryRequired = estimateCooperativelySolvingL1TransposedMemoryRequirement<blockStep>(rowCount, solvingMemoryEstimates);
+    dIASSERT(solvingMemoryRequired <= resourceContainer->getMemoryBufferSize());
+
+    void *bufferAllocated = resourceContainer->getMemoryBufferPointer();
+    dIASSERT(bufferAllocated != NULL);
+    dIASSERT(dALIGN_PTR(bufferAllocated, ALLOCATION_DEFAULT_ALIGNMENT) == bufferAllocated);
+
+    void *bufferCurrentLocation = bufferAllocated;
+    bufferCurrentLocation = markCooperativelySolvingL1TransposedMemoryStructuresOut(bufferCurrentLocation, solvingMemoryEstimates, blockProgressDescriptors, cellContexts);
+    dIVERIFY(bufferCurrentLocation <= (uint8 *)bufferAllocated + solvingMemoryRequired);
+
+    initializeCooperativelySolveL1TransposedMemoryStructures<blockStep>(rowCount, blockCompletionProgress, blockProgressDescriptors, cellContexts);
+
+    dCallReleaseeID calculationFinishReleasee;
+    SolveL1TransposedWorkerContext workerContext; // The variable must exist in the outer scope
+
+    workerContext.init(L, b, rowCount, rowSkip, blockCompletionProgress, blockProgressDescriptors, cellContexts);
+
+    dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+    threading->PostThreadedCall(NULL, &calculationFinishReleasee, threadCountToUse - 1, NULL, completionWait, &solveL1Transposed_completion_callback, NULL, 0, "SolveL1Transposed Completion");
+    threading->PostThreadedCallsGroup(NULL, threadCountToUse - 1, calculationFinishReleasee, &solveL1Transposed_worker_callback, &workerContext, "SolveL1Transposed Work");
+
+    participateSolvingL1Transposed<blockStep, SL1T_B_STRIDE>(L, b, rowCount, rowSkip, blockCompletionProgress, blockProgressDescriptors, cellContexts, threadCountToUse - 1);
+
+    threading->WaitThreadedCallExclusively(NULL, completionWait, NULL, "SolveL1Transposed End Wait");
+}
+
+/*static */
+int ThreadedEquationSolverLDLT::solveL1Transposed_worker_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    SolveL1TransposedWorkerContext *ptrContext = (SolveL1TransposedWorkerContext *)callContext;
+
+    solveL1Transposed_worker(*ptrContext, dCAST_TO_SMALLER(unsigned, callInstanceIndex));
+    
+    return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::solveL1Transposed_worker(SolveL1TransposedWorkerContext &ref_context, unsigned ownThreadIndex)
+{
+    const unsigned blockStep = SL1T_BLOCK_SIZE;
+    participateSolvingL1Transposed<blockStep, SL1T_B_STRIDE>(ref_context.m_L, ref_context.m_b, ref_context.m_rowCount, ref_context.m_rowSkip, 
+        *ref_context.m_ptrBlockCompletionProgress, ref_context.m_blockProgressDescriptors, ref_context.m_cellContexts, ownThreadIndex);
+}
+
+/*static */
+int ThreadedEquationSolverLDLT::solveL1Transposed_completion_callback(void *dUNUSED(callContext), dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    return 1;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////
+// Public interface functions
+
+/*extern ODE_API */
+void dSolveL1T(const dReal *L, dReal *B, int rowCount, int rowSkip)
+{
+    dAASSERT(rowCount != 0);
+
+    if (rowCount != 0)
+    {
+        dAASSERT(L != NULL);
+        dAASSERT(B != NULL);
+
+        solveL1Transposed<1>(L, B, rowCount, rowSkip);
+    }
+}
+
+
+/*extern ODE_API */
+void dEstimateCooperativelySolveL1TransposedResourceRequirements(dResourceRequirementsID requirements, 
+    unsigned maximalAllowedThreadCount, unsigned maximalRowCount)
+{
+    dAASSERT(requirements != NULL);
+
+    dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
+    ThreadedEquationSolverLDLT::estimateCooperativeSolvingL1TransposedResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalRowCount);
+}
+
+/*extern ODE_API */
+void dCooperativelySolveL1Transposed(dResourceContainerID resources, unsigned allowedThreadCount, 
+    const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip)
+{
+    dAASSERT(resources != NULL);
+
+    dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
+    ThreadedEquationSolverLDLT::cooperativelySolveL1Transposed(resourceContainer, allowedThreadCount, L, b, rowCount, rowSkip);
+}
+
diff --git a/libs/ode-0.16.1/ode/src/fastltsolve_impl.h b/libs/ode-0.16.1/ode/src/fastltsolve_impl.h
new file mode 100644
index 0000000..ca30d9c
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastltsolve_impl.h
@@ -0,0 +1,1440 @@
+
+
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * Code style improvements and optimizations by Oleh Derevenko ????-2019
+ * L1Transposed cooperative solving code of ThreadedEquationSolverLDLT copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")  
+ */
+
+
+#ifndef _ODE_FASTLTSOLVE_IMPL_H_
+#define _ODE_FASTLTSOLVE_IMPL_H_
+
+
+/* solve L^T * x=b, with b containing 1 right hand side.
+ * L is an n*n lower triangular matrix with ones on the diagonal.
+ * L is stored by rows and its leading dimension is rowSkip.
+ * b is an n*1 matrix that contains the right hand side.
+ * b is overwritten with x.
+ * this processes blocks of 4.
+ */
+
+template<unsigned int b_stride>
+void solveL1Transposed(const dReal *L, dReal *B, unsigned rowCount, unsigned rowSkip)
+{
+    dIASSERT(rowCount != 0);
+
+    /* special handling for L and B because we're solving L1 *transpose* */
+    const dReal *lastLElement = L + (sizeint)(rowCount - 1) * (rowSkip + 1);
+    dReal *lastBElement = B + (sizeint)(rowCount - 1) * b_stride;
+
+    /* compute rows at end that are not a multiple of block size */
+    const unsigned loopX1RowCount = rowCount % 4;
+    
+    unsigned blockStartRow = loopX1RowCount;
+    bool subsequentPass  = false;
+
+    /* compute rightmost bottom X(i) block */
+    if (loopX1RowCount != 0)
+    {
+        subsequentPass = true;
+
+        const dReal *ptrLElement = lastLElement;
+        dReal *ptrBElement = lastBElement;
+
+        dReal Y11 = ptrBElement[0 * b_stride]/* - Z11*/;
+        // ptrBElement[0 * b_stride] = Y11; -- unchanged
+
+        if (loopX1RowCount >= 2)
+        {
+            dReal p2 = ptrLElement[-1];
+            dReal Y21 = ptrBElement[-1 * (int)b_stride]/* - Z21 */- p2 * Y11;
+            ptrBElement[-1 * (int)b_stride] = Y21;
+            
+            if (loopX1RowCount > 2)
+            {
+                dReal p3 = ptrLElement[-2];
+                dReal p3_1 = (ptrLElement - rowSkip)[-2];
+                dReal Y31 = ptrBElement[-2 * (int)b_stride]/* - Z31 */- p3 * Y11 - p3_1 * Y21;
+                ptrBElement[-2 * (int)b_stride] = Y31;
+            }
+        }
+    }
+    
+    /* compute all 4 x 1 blocks of X */
+    for (; !subsequentPass || blockStartRow < rowCount; subsequentPass = true, blockStartRow += 4)
+    {
+        /* compute all 4 x 1 block of X, from rows i..i+4-1 */
+
+        /* declare variables - Z matrix, p and q vectors, etc */
+        const dReal *ptrLElement;
+        dReal *ptrBElement;
+
+        dReal Z41, Z31, Z21, Z11;
+
+        if (subsequentPass)
+        {
+            ptrLElement = lastLElement - blockStartRow;
+            ptrBElement = lastBElement;
+
+            /* set the Z matrix to 0 */
+            Z41 = 0; Z31 = 0; Z21 = 0; Z11 = 0;
+
+            unsigned rowCounter = blockStartRow;
+
+            if (rowCounter % 2 != 0)
+            {
+                dReal q1, p4, p3, p2, p1;
+
+                /* load p and q values */
+                q1 = ptrBElement[0 * (int)b_stride];
+                p4 = ptrLElement[-3];
+                p3 = ptrLElement[-2];
+                p2 = ptrLElement[-1];
+                p1 = ptrLElement[0];
+                ptrLElement -= rowSkip;
+
+                /* compute outer product and add it to the Z matrix */
+                Z41 += p4 * q1;
+                Z31 += p3 * q1;
+                Z21 += p2 * q1;
+                Z11 += p1 * q1;
+
+                ptrBElement -= 1 * b_stride;
+                rowCounter -= 1;
+            }
+
+            if (rowCounter % 4 != 0)
+            {
+                dReal q1, p4, p3, p2, p1;
+
+                /* load p and q values */
+                q1 = ptrBElement[0 * (int)b_stride];
+                p4 = ptrLElement[-3];
+                p3 = ptrLElement[-2];
+                p2 = ptrLElement[-1];
+                p1 = ptrLElement[0];
+                ptrLElement -= rowSkip;
+
+                /* compute outer product and add it to the Z matrix */
+                Z41 += p4 * q1;
+                Z31 += p3 * q1;
+                Z21 += p2 * q1;
+                Z11 += p1 * q1;
+
+                /* load p and q values */
+                q1 = ptrBElement[-1 * (int)b_stride];
+                p4 = ptrLElement[-3];
+                p3 = ptrLElement[-2];
+                p2 = ptrLElement[-1];
+                p1 = ptrLElement[0];
+                ptrLElement -= rowSkip;
+
+                /* compute outer product and add it to the Z matrix */
+                Z41 += p4 * q1;
+                Z31 += p3 * q1;
+                Z21 += p2 * q1;
+                Z11 += p1 * q1;
+
+                ptrBElement -= 2 * b_stride;
+                rowCounter -= 2;
+            }
+
+            /* the inner loop that computes outer products and adds them to Z */
+            for (bool exitLoop = rowCounter == 0; !exitLoop; exitLoop = false)
+            {
+                dReal q1, p4, p3, p2, p1;
+
+                /* load p and q values */
+                q1 = ptrBElement[0 * (int)b_stride];
+                p4 = ptrLElement[-3];
+                p3 = ptrLElement[-2];
+                p2 = ptrLElement[-1];
+                p1 = ptrLElement[0];
+                ptrLElement -= rowSkip;
+
+                /* compute outer product and add it to the Z matrix */
+                Z41 += p4 * q1;
+                Z31 += p3 * q1;
+                Z21 += p2 * q1;
+                Z11 += p1 * q1;
+
+                /* load p and q values */
+                q1 = ptrBElement[-1 * (int)b_stride];
+                p4 = ptrLElement[-3];
+                p3 = ptrLElement[-2];
+                p2 = ptrLElement[-1];
+                p1 = ptrLElement[0];
+                ptrLElement -= rowSkip;
+
+                /* compute outer product and add it to the Z matrix */
+                Z41 += p4 * q1;
+                Z31 += p3 * q1;
+                Z21 += p2 * q1;
+                Z11 += p1 * q1;
+
+                /* load p and q values */
+                q1 = ptrBElement[-2 * (int)b_stride];
+                p4 = ptrLElement[-3];
+                p3 = ptrLElement[-2];
+                p2 = ptrLElement[-1];
+                p1 = ptrLElement[0];
+                ptrLElement -= rowSkip;
+
+                /* compute outer product and add it to the Z matrix */
+                Z41 += p4 * q1;
+                Z31 += p3 * q1;
+                Z21 += p2 * q1;
+                Z11 += p1 * q1;
+
+                /* load p and q values */
+                q1 = ptrBElement[-3 * (int)b_stride];
+                p4 = ptrLElement[-3];
+                p3 = ptrLElement[-2];
+                p2 = ptrLElement[-1];
+                p1 = ptrLElement[0];
+                ptrLElement -= rowSkip;
+
+                /* compute outer product and add it to the Z matrix */
+                Z41 += p4 * q1;
+                Z31 += p3 * q1;
+                Z21 += p2 * q1;
+                Z11 += p1 * q1;
+
+                if (rowCounter > 12)
+                {
+                    rowCounter -= 12;
+
+                    ptrBElement -= 12 * b_stride;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[8 * b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z41 += p4 * q1;
+                    Z31 += p3 * q1;
+                    Z21 += p2 * q1;
+                    Z11 += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[7 * b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z41 += p4 * q1;
+                    Z31 += p3 * q1;
+                    Z21 += p2 * q1;
+                    Z11 += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[6 * b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z41 += p4 * q1;
+                    Z31 += p3 * q1;
+                    Z21 += p2 * q1;
+                    Z11 += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[5 * b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z41 += p4 * q1;
+                    Z31 += p3 * q1;
+                    Z21 += p2 * q1;
+                    Z11 += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[4 * b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z41 += p4 * q1;
+                    Z31 += p3 * q1;
+                    Z21 += p2 * q1;
+                    Z11 += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[3 * b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z41 += p4 * q1;
+                    Z31 += p3 * q1;
+                    Z21 += p2 * q1;
+                    Z11 += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[2 * b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z41 += p4 * q1;
+                    Z31 += p3 * q1;
+                    Z21 += p2 * q1;
+                    Z11 += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[1 * b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z41 += p4 * q1;
+                    Z31 += p3 * q1;
+                    Z21 += p2 * q1;
+                    Z11 += p1 * q1;
+                }
+                else
+                {
+                    ptrBElement -= 4 * b_stride;
+
+                    if ((rowCounter -= 4) == 0)
+                    {
+                        break;
+                    }
+                }
+                /* end of inner loop */
+            }
+        }
+        else
+        {
+            ptrLElement = lastLElement/* - blockStartRow*/; dIASSERT(blockStartRow == 0);
+            ptrBElement = lastBElement;
+
+            /* set the Z matrix to 0 */
+            Z41 = 0; Z31 = 0; Z21 = 0; Z11 = 0;
+        }
+
+        /* finish computing the X(i) block */
+        dReal Y11, Y21, Y31, Y41;
+        {
+            Y11 = ptrBElement[0 * b_stride] - Z11;
+            ptrBElement[0 * b_stride] = Y11;
+        }
+        {
+            dReal p2 = ptrLElement[-1];
+            Y21 = ptrBElement[-1 * (int)b_stride] - Z21 - p2 * Y11;
+            ptrBElement[-1 * (int)b_stride] = Y21;
+        }
+        {
+            dReal p3 = ptrLElement[-2];
+            dReal p3_1 = (ptrLElement - rowSkip)[-2];
+            Y31 = ptrBElement[-2 * (int)b_stride] - Z31 - p3 * Y11 - p3_1 * Y21;
+            ptrBElement[-2 * (int)b_stride] = Y31;
+        }
+        {
+            dReal p4 = ptrLElement[-3];
+            dReal p4_1 = (ptrLElement - rowSkip)[-3];
+            dReal p4_2 = (ptrLElement - rowSkip * 2)[-3];
+            Y41 = ptrBElement[-3 * (int)b_stride] - Z41 - p4 * Y11 - p4_1 * Y21 - p4_2 * Y31;
+            ptrBElement[-3 * (int)b_stride] = Y41;
+        }
+        /* end of outer loop */
+    }
+}
+
+
+
+template<unsigned int block_step>
+/*static */
+sizeint ThreadedEquationSolverLDLT::estimateCooperativelySolvingL1TransposedMemoryRequirement(unsigned rowCount, SolvingL1TransposedMemoryEstimates &ref_solvingMemoryEstimates)
+{
+    unsigned blockCount = deriveSolvingL1TransposedBlockCount(rowCount, block_step);
+    sizeint descriptorSizeRequired = dEFFICIENT_SIZE(sizeof(cellindexint) * blockCount);
+    sizeint contextSizeRequired = dEFFICIENT_SIZE(sizeof(SolveL1TransposedCellContext) * (CCI__MAX + 1) * blockCount);
+    ref_solvingMemoryEstimates.assignData(descriptorSizeRequired, contextSizeRequired);
+
+    sizeint totalSizeRequired = descriptorSizeRequired + contextSizeRequired;
+    return totalSizeRequired;
+}
+
+template<unsigned int block_step>
+/*static */
+void ThreadedEquationSolverLDLT::initializeCooperativelySolveL1TransposedMemoryStructures(unsigned rowCount, 
+    atomicord32 &out_blockCompletionProgress, cellindexint *blockProgressDescriptors, SolveL1TransposedCellContext *dUNUSED(cellContexts))
+{
+    unsigned blockCount = deriveSolvingL1TransposedBlockCount(rowCount, block_step);
+
+    out_blockCompletionProgress = 0;
+    memset(blockProgressDescriptors, 0, blockCount * sizeof(*blockProgressDescriptors));
+}
+
+template<unsigned int block_step, unsigned int b_stride>
+/*static */
+void ThreadedEquationSolverLDLT::participateSolvingL1Transposed(const dReal *L, dReal *B, unsigned rowCount, unsigned rowSkip, 
+    volatile atomicord32 &refBlockCompletionProgress/*=0*/, volatile cellindexint *blockProgressDescriptors/*=[blockCount]*/, 
+    SolveL1TransposedCellContext *cellContexts/*=[CCI__MAX x blockCount] + [blockCount]*/, unsigned ownThreadIndex)
+{
+    const unsigned lookaheadRange = 32;
+    const unsigned blockCount = deriveSolvingL1TransposedBlockCount(rowCount, block_step);
+    /* compute rows at end that are not a multiple of block size */
+    const unsigned loopX1RowCount = rowCount % block_step;
+
+    /* special handling for L and B because we're solving L1 *transpose* */
+    const dReal *lastLElement = L + (rowCount - 1) * ((sizeint)rowSkip + 1);
+    dReal *lastBElement = B + (rowCount - 1) * (sizeint)b_stride;
+
+    /* elements adjusted as if the last block was full block_step elements */
+    unsigned x1AdjustmentElements = (block_step - loopX1RowCount) % block_step;
+    const dReal *columnAdjustedLastLElement = lastLElement + x1AdjustmentElements;
+    const dReal *fullyAdjustedLastLElement = columnAdjustedLastLElement + (sizeint)rowSkip * x1AdjustmentElements;
+    dReal *adjustedLastBElement = lastBElement + b_stride * x1AdjustmentElements;
+
+    BlockProcessingState blockProcessingState = BPS_NO_BLOCKS_PROCESSED;
+
+    unsigned completedBlocks = refBlockCompletionProgress;
+    unsigned currentBlock = completedBlocks;
+    dIASSERT(completedBlocks <= blockCount);
+
+    for (bool exitLoop = completedBlocks == blockCount; !exitLoop; exitLoop = false)
+    {
+        bool goForLockedBlockPrimaryCalculation = false, goForLockedBlockDuplicateCalculation = false;
+        bool goAssigningTheResult = false, stayWithinTheBlock = false;
+
+        dReal Z[block_step];
+        dReal Y[block_step];
+
+        dReal *ptrBElement;
+
+        CellContextInstance previousContextInstance;
+        unsigned completedRowBlock;
+        bool partialBlock;
+
+        for (cellindexint testDescriptor = blockProgressDescriptors[currentBlock]; ; )
+        {
+            if (testDescriptor == INVALID_CELLDESCRIPTOR)
+            {
+                // Invalid descriptor is the indication that the row has been fully calculated
+                // Test if this was the last row and break out if so.
+                if (currentBlock + 1 == blockCount)
+                {
+                    exitLoop = true;
+                    break;
+                }
+
+                // Treat detected row advancement as a row processed
+                // blockProcessingState = BPS_SOME_BLOCKS_PROCESSED; <-- performs better without it
+                break;
+            }
+
+            CooperativeAtomics::AtomicReadReorderBarrier();
+            // It is necessary to read up to date completedBblocks value after the descriptor retrieval
+            // as otherwise the logic below breaks
+            completedBlocks = refBlockCompletionProgress;
+
+            if (!GET_CELLDESCRIPTOR_ISLOCKED(testDescriptor))
+            {
+                completedRowBlock = GET_CELLDESCRIPTOR_COLUMNINDEX(testDescriptor);
+                dIASSERT(completedRowBlock < currentBlock || (completedRowBlock == currentBlock && currentBlock == 0)); // Otherwise, why would the calculation have had stopped if the final column is reachable???
+                dIASSERT(completedRowBlock <= completedBlocks); // Since the descriptor is not locked
+
+                if (completedRowBlock == completedBlocks && currentBlock != completedBlocks)
+                {
+                    dIASSERT(completedBlocks < currentBlock);
+                    break;
+                }
+
+                if (CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], testDescriptor, MARK_CELLDESCRIPTOR_LOCKED(testDescriptor)))
+                {
+                    if (completedRowBlock != 0)
+                    {
+                        CellContextInstance contextInstance = GET_CELLDESCRIPTOR_CONTEXTINSTANCE(testDescriptor);
+                        previousContextInstance = contextInstance;
+
+                        const SolveL1TransposedCellContext &sourceContext = buildBlockContextRef(cellContexts, currentBlock, contextInstance);
+                        sourceContext.loadPrecalculatedZs(Z);
+                    }
+                    else
+                    {
+                        previousContextInstance = CCI__MIN;
+                        SolveL1TransposedCellContext::initializePrecalculatedZs(Z);
+                    }
+
+                    goForLockedBlockPrimaryCalculation = true;
+                    break;
+                }
+
+                if (blockProcessingState != BPS_COMPETING_FOR_A_BLOCK)
+                {
+                    break;
+                }
+
+                testDescriptor = blockProgressDescriptors[currentBlock];
+            }
+            else
+            {
+                if (blockProcessingState != BPS_COMPETING_FOR_A_BLOCK)
+                {
+                    break;
+                }
+
+                cellindexint verificativeDescriptor;
+                bool verificationFailure = false;
+
+                completedRowBlock = GET_CELLDESCRIPTOR_COLUMNINDEX(testDescriptor);
+                dIASSERT(completedRowBlock != currentBlock || currentBlock == 0); // There is no reason for computations to stop at the very end other than being the initial value at the very first block
+
+                if (completedRowBlock != 0)
+                {
+                    CellContextInstance contextInstance = GET_CELLDESCRIPTOR_CONTEXTINSTANCE(testDescriptor);
+                    const SolveL1TransposedCellContext &sourceContext = buildBlockContextRef(cellContexts, currentBlock, contextInstance);
+                    sourceContext.loadPrecalculatedZs(Z);
+                }
+                else
+                {
+                    SolveL1TransposedCellContext::initializePrecalculatedZs(Z);
+                }
+
+                if (completedRowBlock != 0 && completedRowBlock <= currentBlock)
+                {
+                    // Make sure the descriptor is re-read after the precalculates
+                    CooperativeAtomics::AtomicReadReorderBarrier();
+                }
+
+                if (completedRowBlock <= currentBlock)
+                {
+                    verificativeDescriptor = blockProgressDescriptors[currentBlock];
+                    verificationFailure = verificativeDescriptor != testDescriptor;
+                }
+
+                if (!verificationFailure)
+                {
+                    dIASSERT(completedRowBlock <= currentBlock + 1);
+
+                    goForLockedBlockDuplicateCalculation = true;
+                    break;
+                }
+
+                testDescriptor = verificativeDescriptor;
+            }
+        }
+
+        if (exitLoop)
+        {
+            break;
+        }
+
+        if (goForLockedBlockPrimaryCalculation)
+        {
+            blockProcessingState = BPS_SOME_BLOCKS_PROCESSED;
+
+            // Declare and assign the variables at the top to not interfere with any branching -- the compiler is going to eliminate them anyway.
+            bool handleComputationTakenOver = false, columnEndReached = false;
+            
+            const dReal *ptrLElement;
+            unsigned finalRowBlock;
+
+            /* check if this is not the partial block of fewer rows */
+            if (currentBlock != 0 || loopX1RowCount == 0)
+            {
+                partialBlock = false;
+
+                ptrLElement = completedRowBlock != 0 
+                    ? fullyAdjustedLastLElement - currentBlock * block_step - (sizeint)(completedRowBlock * block_step) * rowSkip 
+                    : columnAdjustedLastLElement - currentBlock * block_step;
+                ptrBElement = completedRowBlock != 0 
+                    ? adjustedLastBElement - (sizeint)(completedRowBlock * block_step) * b_stride 
+                    : lastBElement;
+
+                finalRowBlock = dMACRO_MIN(currentBlock, completedBlocks);
+                dIASSERT(finalRowBlock != completedRowBlock || finalRowBlock == 0);
+
+                unsigned rowCounter = finalRowBlock - completedRowBlock;
+                bool exitLoop = rowCounter == 0;
+
+                if (exitLoop)
+                {
+                    columnEndReached = true;
+                }
+                else if (completedRowBlock == 0 && currentBlock != 0 && loopX1RowCount != 0)
+                {
+                    if ((loopX1RowCount & 1) != 0)
+                    {
+                        dReal q1, p4, p3, p2, p1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[0 * (int)b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        ptrBElement -= 1 * b_stride;
+                    }
+
+                    if ((loopX1RowCount & 2) != 0)
+                    {
+                        dReal q1, p4, p3, p2, p1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[0 * (int)b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[-1 * (int)b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        ptrBElement -= 2 * b_stride;
+                    }
+                    dSASSERT(block_step == 4);
+
+                    if (--rowCounter == 0)
+                    {
+                        do 
+                        {
+                            if (finalRowBlock == currentBlock)
+                            {
+                                columnEndReached = true;
+                                exitLoop = true;
+                                break;
+                            }
+
+                            // Take a look if any more columns have been completed...
+                            completedBlocks = refBlockCompletionProgress;
+                            dIASSERT(completedBlocks >= finalRowBlock);
+
+                            if (completedBlocks == finalRowBlock)
+                            {
+                                exitLoop = true;
+                                break;
+                            }
+
+                            // ...continue if so.
+                            unsigned rowCompletedSoFar = finalRowBlock;
+                            finalRowBlock = dMACRO_MIN(currentBlock, completedBlocks);
+                            rowCounter = finalRowBlock - rowCompletedSoFar;
+                        }
+                        while (false);
+                    }
+                }
+
+                for (; !exitLoop; exitLoop = false)
+                {
+                    dReal q1, p4, p3, p2, p1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[0 * (int)b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z[3] += p4 * q1;
+                    Z[2] += p3 * q1;
+                    Z[1] += p2 * q1;
+                    Z[0] += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-1 * (int)b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z[3] += p4 * q1;
+                    Z[2] += p3 * q1;
+                    Z[1] += p2 * q1;
+                    Z[0] += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-2 * (int)b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z[3] += p4 * q1;
+                    Z[2] += p3 * q1;
+                    Z[1] += p2 * q1;
+                    Z[0] += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-3 * (int)b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z[3] += p4 * q1;
+                    Z[2] += p3 * q1;
+                    Z[1] += p2 * q1;
+                    Z[0] += p1 * q1;
+                    dSASSERT(block_step == 4);
+
+                    if (rowCounter > 3)
+                    {
+                        rowCounter -= 3;
+
+                        ptrBElement -= 3 * block_step * b_stride;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[8 * b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[7 * b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[6 * b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[5 * b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[4 * b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[3 * b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[2 * b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[1 * b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+                        dSASSERT(block_step == 4);
+                    }
+                    else
+                    {
+                        ptrBElement -= block_step * b_stride;
+
+                        if (--rowCounter == 0)
+                        {
+                            if (finalRowBlock == currentBlock)
+                            {
+                                columnEndReached = true;
+                                break;
+                            }
+
+                            // Take a look if any more columns have been completed...
+                            completedBlocks = refBlockCompletionProgress;
+                            dIASSERT(completedBlocks >= finalRowBlock);
+
+                            if (completedBlocks == finalRowBlock)
+                            {
+                                break;
+                            }
+
+                            // ...continue if so.
+                            unsigned rowCompletedSoFar = finalRowBlock;
+                            finalRowBlock = dMACRO_MIN(currentBlock, completedBlocks);
+                            rowCounter = finalRowBlock - rowCompletedSoFar;
+                        }
+                    }
+                    /* end of inner loop */
+                }
+            }
+            else /* compute rightmost bottom X(i) block */
+            {
+                partialBlock = true;
+
+                ptrLElement = lastLElement;
+                ptrBElement = lastBElement;
+                dIASSERT(completedRowBlock == 0);
+
+                columnEndReached = true;
+            }
+
+            if (columnEndReached)
+            {
+                // Check whether there is still a need to proceed or if the computation has been taken over by another thread
+                cellindexint oldDescriptor = MAKE_CELLDESCRIPTOR(completedRowBlock, previousContextInstance, true);
+
+                if (blockProgressDescriptors[currentBlock] == oldDescriptor)
+                {
+                    if (partialBlock)
+                    {
+                        Y[0] = ptrBElement[0 * b_stride]/* - Z[0]*/;
+
+                        if (loopX1RowCount >= 2)
+                        {
+                            dReal p2 = ptrLElement[-1];
+                            Y[1] = ptrBElement[-1 * (int)b_stride]/* - Z[1] */- p2 * Y[0];
+
+                            if (loopX1RowCount > 2)
+                            {
+                                dReal p3 = ptrLElement[-2];
+                                dReal p3_1 = (ptrLElement - rowSkip)[-2];
+                                Y[2] = ptrBElement[-2 * (int)b_stride]/* - Z[2] */- p3 * Y[0] - p3_1 * Y[1];
+                            }
+                        }
+
+                        dSASSERT(block_step == 4);
+                    }
+                    else
+                    {
+                        Y[0] = ptrBElement[0 * b_stride] - Z[0];
+
+                        dReal p2 = ptrLElement[-1];
+                        Y[1] = ptrBElement[-1 * (int)b_stride] - Z[1] - p2 * Y[0];
+
+                        dReal p3 = ptrLElement[-2];
+                        dReal p3_1 = (ptrLElement - rowSkip)[-2];
+                        Y[2] = ptrBElement[-2 * (int)b_stride] - Z[2] - p3 * Y[0] - p3_1 * Y[1];
+
+                        dReal p4 = ptrLElement[-3];
+                        dReal p4_1 = (ptrLElement - rowSkip)[-3];
+                        dReal p4_2 = (ptrLElement - rowSkip * 2)[-3];
+                        Y[3] = ptrBElement[-3 * (int)b_stride] - Z[3] - p4 * Y[0] - p4_1 * Y[1] - p4_2 * Y[2];
+                        
+                        dSASSERT(block_step == 4);
+                    }
+
+                    // Use atomic memory barrier to make sure memory reads of ptrBElement[] and blockProgressDescriptors[] are not swapped
+                    CooperativeAtomics::AtomicReadReorderBarrier();
+
+                    // The descriptor has not been altered yet - this means the ptrBElement[] values used above were not modified yet 
+                    // and the computation result is valid.
+                    if (blockProgressDescriptors[currentBlock] == oldDescriptor)
+                    {
+                        // Assign the results to the result context (possibly in parallel with other threads 
+                        // that could and ought to be assigning exactly the same values)
+                        SolveL1TransposedCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                        resultContext.storePrecalculatedZs(Y);
+
+                        // Assign the result assignment progress descriptor
+                        cellindexint newDescriptor = MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true);
+                        CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], oldDescriptor, newDescriptor); // the result is to be ignored
+
+                        // Whether succeeded or not, the result is valid, so go on trying to assign it to the matrix
+                        goAssigningTheResult = true;
+                    }
+                    else
+                    {
+                        // Otherwise, go on competing for copying the results
+                        handleComputationTakenOver = true;
+                    }
+                }
+                else
+                {
+                    handleComputationTakenOver = true;
+                }
+            }
+            else
+            {
+                // If the final column has not been reached yet, store current values to the context.
+                // Select the other context instance as the previous one might be read by other threads.
+                CellContextInstance nextContextInstance = buildNextContextInstance(previousContextInstance);
+                SolveL1TransposedCellContext &destinationContext = buildBlockContextRef(cellContexts, currentBlock, nextContextInstance);
+                destinationContext.storePrecalculatedZs(Z);
+
+                // Unlock the row until more columns can be used
+                cellindexint oldDescriptor = MAKE_CELLDESCRIPTOR(completedRowBlock, previousContextInstance, true);
+                cellindexint newDescriptor = MAKE_CELLDESCRIPTOR(finalRowBlock, nextContextInstance, false);
+                // The descriptor might have been updated by a competing thread
+                if (!CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], oldDescriptor, newDescriptor))
+                {
+                    // Adjust the ptrBElement to point to the result area...
+                    ptrBElement = adjustedLastBElement - (sizeint)(currentBlock * block_step) * b_stride;
+                    dIASSERT(currentBlock != 0 || adjustedLastBElement == lastBElement);
+                    // ...and go on handling the case
+                    handleComputationTakenOver = true;
+                }
+            }
+
+            if (handleComputationTakenOver)
+            {
+                cellindexint existingDescriptor = blockProgressDescriptors[currentBlock];
+                // This can only happen if the row was (has become) the uppermost not fully completed one
+                // and the competing thread is at final stage of calculation (i.e., it has reached the currentBlock column).
+                if (existingDescriptor != INVALID_CELLDESCRIPTOR)
+                {
+                    // If not fully completed this must be the final stage of the result assignment into the matrix
+                    dIASSERT(existingDescriptor == MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true));
+
+                    // Go on competing copying the result as anyway the block is the topmost not completed one
+                    // and since there was competition for it, there is no other work that can be done right now.
+                    const SolveL1TransposedCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                    resultContext.loadPrecalculatedZs(Y);
+
+                    goAssigningTheResult = true;
+                }
+                else 
+                {
+                    // everything is over -- just go handling next blocks
+                }
+            }
+        }
+        else if (goForLockedBlockDuplicateCalculation)
+        {
+            blockProcessingState = BPS_SOME_BLOCKS_PROCESSED;
+
+            bool skipToHandlingSubsequentRows = false, skiptoCopyingResult = false;
+
+            /* declare variables */
+            const dReal *ptrLElement;
+
+            if (completedRowBlock < currentBlock)
+            {
+                partialBlock = false;
+
+                ptrLElement = completedRowBlock != 0 
+                    ? fullyAdjustedLastLElement - currentBlock * block_step - (sizeint)(completedRowBlock * block_step) * rowSkip
+                    : columnAdjustedLastLElement - currentBlock * block_step;
+                ptrBElement = completedRowBlock != 0 
+                    ? adjustedLastBElement - (sizeint)(completedRowBlock * block_step) * b_stride 
+                    : lastBElement;
+
+                unsigned finalRowBlock = currentBlock/*std::min(currentBlock, completedBlocks)*/;
+                dIASSERT(currentBlock == completedBlocks); // Why would we be competing for a row otherwise?
+
+                bool exitInnerLoop = false;
+                unsigned lastCompletedRow = completedRowBlock;
+                unsigned rowCounter = finalRowBlock - completedRowBlock;
+
+                if (completedRowBlock == 0/* && currentBlock != 0 */&& loopX1RowCount != 0)
+                {
+                    if ((loopX1RowCount & 1) != 0)
+                    {
+                        dReal q1, p4, p3, p2, p1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[0 * (int)b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        ptrBElement -= 1 * b_stride;
+                    }
+
+                    if ((loopX1RowCount & 2) != 0)
+                    {
+                        dReal q1, p4, p3, p2, p1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[0 * (int)b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        /* load p and q values */
+                        q1 = ptrBElement[-1 * (int)b_stride];
+                        p4 = ptrLElement[-3];
+                        p3 = ptrLElement[-2];
+                        p2 = ptrLElement[-1];
+                        p1 = ptrLElement[0];
+                        ptrLElement -= rowSkip;
+
+                        /* compute outer product and add it to the Z matrix */
+                        Z[3] += p4 * q1;
+                        Z[2] += p3 * q1;
+                        Z[1] += p2 * q1;
+                        Z[0] += p1 * q1;
+
+                        ptrBElement -= 2 * b_stride;
+                    }
+                    dSASSERT(block_step == 4);
+
+                    if (--rowCounter == 0)
+                    {
+                        exitInnerLoop = true;
+                    }
+                }
+
+                for (; !exitInnerLoop; exitInnerLoop = --rowCounter == 0)
+                {
+                    dReal q1, p4, p3, p2, p1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[0 * (int)b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z[3] += p4 * q1;
+                    Z[2] += p3 * q1;
+                    Z[1] += p2 * q1;
+                    Z[0] += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-1 * (int)b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z[3] += p4 * q1;
+                    Z[2] += p3 * q1;
+                    Z[1] += p2 * q1;
+                    Z[0] += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-2 * (int)b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z[3] += p4 * q1;
+                    Z[2] += p3 * q1;
+                    Z[1] += p2 * q1;
+                    Z[0] += p1 * q1;
+
+                    /* load p and q values */
+                    q1 = ptrBElement[-3 * (int)b_stride];
+                    p4 = ptrLElement[-3];
+                    p3 = ptrLElement[-2];
+                    p2 = ptrLElement[-1];
+                    p1 = ptrLElement[0];
+                    ptrLElement -= rowSkip;
+
+                    /* compute outer product and add it to the Z matrix */
+                    Z[3] += p4 * q1;
+                    Z[2] += p3 * q1;
+                    Z[1] += p2 * q1;
+                    Z[0] += p1 * q1;
+                    dSASSERT(block_step == 4);
+
+                    // Check if the primary solver thread has not made any progress
+                    cellindexint descriptorVerification = blockProgressDescriptors[currentBlock];
+                    unsigned newCompletedRow = GET_CELLDESCRIPTOR_COLUMNINDEX(descriptorVerification);
+
+                    if (newCompletedRow != lastCompletedRow)
+                    {
+                        // Check, this is the first change the current thread detects.
+                        // There is absolutely no reason in code for the computation to stop/resume twice 
+                        // while the current thread is competing.
+                        dIASSERT(lastCompletedRow == completedRowBlock);
+
+                        if (descriptorVerification == INVALID_CELLDESCRIPTOR)
+                        {
+                            skipToHandlingSubsequentRows = true;
+                            break;
+                        }
+
+                        if (newCompletedRow == currentBlock + 1)
+                        {
+                            skiptoCopyingResult = true;
+                            break;
+                        }
+
+                        // Check if the current thread is behind
+                        if (newCompletedRow > finalRowBlock - rowCounter)
+                        {
+                            // If so, go starting over one more time
+                            blockProcessingState = BPS_COMPETING_FOR_A_BLOCK;
+                            stayWithinTheBlock = true;
+                            skipToHandlingSubsequentRows = true;
+                            break;
+                        }
+
+                        // If current thread is ahead, just save new completed column for further comparisons and go on calculating
+                        lastCompletedRow = newCompletedRow;
+                    }
+
+                    /* advance pointers */
+                    ptrBElement -= block_step * b_stride;
+                    /* end of inner loop */
+                }
+            }
+            else if (completedRowBlock > currentBlock)
+            {
+                dIASSERT(completedRowBlock == currentBlock + 1);
+
+                partialBlock = currentBlock == 0 && loopX1RowCount != 0;
+
+                skiptoCopyingResult = true;
+            }
+            else
+            {
+                dIASSERT(currentBlock == 0); // Execution can get here within the very first block only
+                
+                partialBlock = /*currentBlock == 0 && */loopX1RowCount != 0;
+
+                /* just assign the pointers appropriately and go on computing the results */
+                ptrLElement = lastLElement;
+                ptrBElement = lastBElement;
+            }
+
+            if (!skipToHandlingSubsequentRows)
+            {
+                if (!skiptoCopyingResult)
+                {
+                    if (partialBlock)
+                    {
+                        Y[0] = ptrBElement[0 * b_stride]/* - Z[0]*/;
+
+                        if (loopX1RowCount >= 2)
+                        {
+                            dReal p2 = ptrLElement[-1];
+                            Y[1] = ptrBElement[-1 * (int)b_stride]/* - Z[1] */- p2 * Y[0];
+
+                            if (loopX1RowCount > 2)
+                            {
+                                dReal p3 = ptrLElement[-2];
+                                dReal p3_1 = (ptrLElement - rowSkip)[-2];
+                                Y[2] = ptrBElement[-2 * (int)b_stride]/* - Z[2] */- p3 * Y[0] - p3_1 * Y[1];
+                            }
+                        }
+
+                        dSASSERT(block_step == 4);
+                    }
+                    else
+                    {
+                        Y[0] = ptrBElement[0 * b_stride] - Z[0];
+
+                        dReal p2 = ptrLElement[-1];
+                        Y[1] = ptrBElement[-1 * (int)b_stride] - Z[1] - p2 * Y[0];
+
+                        dReal p3 = ptrLElement[-2];
+                        dReal p3_1 = (ptrLElement - rowSkip)[-2];
+                        Y[2] = ptrBElement[-2 * (int)b_stride] - Z[2] - p3 * Y[0] - p3_1 * Y[1];
+
+                        dReal p4 = ptrLElement[-3];
+                        dReal p4_1 = (ptrLElement - rowSkip)[-3];
+                        dReal p4_2 = (ptrLElement - rowSkip * 2)[-3];
+                        Y[3] = ptrBElement[-3 * (int)b_stride] - Z[3] - p4 * Y[0] - p4_1 * Y[1] - p4_2 * Y[2];
+                        
+                        dSASSERT(block_step == 4);
+                    }
+
+                    // Use atomic memory barrier to make sure memory reads of ptrBElement[] and blockProgressDescriptors[] are not swapped
+                    CooperativeAtomics::AtomicReadReorderBarrier();
+
+                    cellindexint existingDescriptor = blockProgressDescriptors[currentBlock];
+
+                    if (existingDescriptor == INVALID_CELLDESCRIPTOR)
+                    {
+                        // Everything is over -- proceed to subsequent rows
+                        skipToHandlingSubsequentRows = true;
+                    }
+                    else if (existingDescriptor == MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true))
+                    {
+                        // The values computed above may not be valid. Copy the values already in the result context.
+                        skiptoCopyingResult = true;
+                    }
+                    else
+                    {
+                        // The descriptor has not been altered yet - this means the ptrBElement[] values used above were not modified yet 
+                        // and the computation result is valid.
+                        cellindexint newDescriptor = MAKE_CELLDESCRIPTOR(currentBlock + 1, CCI__MIN, true); // put the computation at the top so that the evaluation result from the expression above is reused
+
+                        // Assign the results to the result context (possibly in parallel with other threads 
+                        // that could and ought to be assigning exactly the same values)
+                        SolveL1TransposedCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                        resultContext.storePrecalculatedZs(Y);
+
+                        // Assign the result assignment progress descriptor
+                        CooperativeAtomics::AtomicCompareExchangeCellindexint(&blockProgressDescriptors[currentBlock], existingDescriptor, newDescriptor); // the result is to be ignored
+
+                        // Whether succeeded or not, the result is valid, so go on trying to assign it to the matrix
+                    }
+                }
+
+                if (!skipToHandlingSubsequentRows)
+                {
+                    if (skiptoCopyingResult)
+                    {
+                        // Extract the result values stored in the result context
+                        const SolveL1TransposedCellContext &resultContext = buildResultContextRef(cellContexts, currentBlock, blockCount);
+                        resultContext.loadPrecalculatedZs(Y);
+
+                        ptrBElement = currentBlock != 0 ? adjustedLastBElement - (sizeint)(currentBlock * block_step) * b_stride : lastBElement;
+                    }
+
+                    goAssigningTheResult = true;
+                }
+            }
+        }
+
+        if (goAssigningTheResult)
+        {
+            cellindexint existingDescriptor = blockProgressDescriptors[currentBlock];
+            // Check if the assignment has not been completed yet
+            if (existingDescriptor != INVALID_CELLDESCRIPTOR)
+            {
+                // Assign the computation results to B vector
+                if (partialBlock)
+                {
+                    // ptrBElement[0 * b_stride] = Y[0]; -- unchanged
+
+                    if (loopX1RowCount >= 2)
+                    {
+                        ptrBElement[-1 * (int)b_stride] = Y[1];
+
+                        if (loopX1RowCount > 2)
+                        {
+                            ptrBElement[-2 * (int)b_stride] = Y[2];
+                        }
+                    }
+                    dSASSERT(block_step == 4);
+                }
+                else
+                {
+                    ptrBElement[0 * b_stride] = Y[0];
+                    ptrBElement[-1 * (int)b_stride] = Y[1];
+                    ptrBElement[-2 * (int)b_stride] = Y[2];
+                    ptrBElement[-3 * (int)b_stride] = Y[3];
+                    dSASSERT(block_step == 4);
+                }
+
+                ThrsafeIncrementIntUpToLimit(&refBlockCompletionProgress, currentBlock + 1);
+                dIASSERT(refBlockCompletionProgress >= currentBlock + 1);
+
+                // And assign the completed status no matter what
+                CooperativeAtomics::AtomicStoreCellindexint(&blockProgressDescriptors[currentBlock], INVALID_CELLDESCRIPTOR);
+            }
+            else 
+            {
+                // everything is over -- just go handling next blocks
+            }
+        }
+
+        if (!stayWithinTheBlock)
+        {
+            completedBlocks = refBlockCompletionProgress;
+
+            if (completedBlocks == blockCount)
+            {
+                break;
+            }
+
+            currentBlock += 1;
+
+            bool lookaheadBoundaryReached = false;
+
+            if (currentBlock == blockCount || completedBlocks == 0)
+            {
+                lookaheadBoundaryReached = true;
+            }
+            else if (currentBlock >= completedBlocks + lookaheadRange)
+            {
+                lookaheadBoundaryReached = blockProcessingState > BPS_NO_BLOCKS_PROCESSED;
+            }
+            else if (currentBlock < completedBlocks)
+            {
+                // Treat detected row advancement as a row processed
+                // blockProcessingState = BPS_SOME_BLOCKS_PROCESSED; <-- performs better without it
+
+                currentBlock = completedBlocks;
+            }
+
+            if (lookaheadBoundaryReached)
+            {
+                dIASSERT(blockProcessingState != BPS_COMPETING_FOR_A_BLOCK); // Why did not we compete???
+
+                // If no row has been processed in the previous pass, compete for the next row to avoid cycling uselessly
+                if (blockProcessingState <= BPS_NO_BLOCKS_PROCESSED)
+                {
+                    // Abandon job if too few blocks remain
+                    if (blockCount - completedBlocks <= ownThreadIndex)
+                    {
+                        break;
+                    }
+
+                    blockProcessingState = BPS_COMPETING_FOR_A_BLOCK;
+                }
+                else
+                {
+                    // If there was some progress, just continue to the next pass
+                    blockProcessingState = BPS_NO_BLOCKS_PROCESSED;
+                }
+
+                currentBlock = completedBlocks;
+            }
+        }
+    }
+}
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/fastvecscale.cpp b/libs/ode-0.16.1/ode/src/fastvecscale.cpp
new file mode 100644
index 0000000..9927d89
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastvecscale.cpp
@@ -0,0 +1,204 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/* 
+ * Vector scaling related code of ThreadedEquationSolverLDLT 
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+
+#include <ode/common.h>
+#include <ode/matrix.h>
+#include <ode/matrix_coop.h>
+#include "config.h"
+#include "threaded_solver_ldlt.h"
+#include "threading_base.h"
+#include "resource_control.h"
+#include "error.h"
+
+#include "fastvecscale_impl.h"
+
+
+/*static */
+void ThreadedEquationSolverLDLT::estimateCooperativeScalingVectorResourceRequirements(
+    dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+    unsigned allowedThreadCount, unsigned elementCount)
+{
+    dxThreadingBase *threading = summaryRequirementsDescriptor->getrelatedThreading();
+    unsigned limitedThreadCount = restrictScalingVectorAllowedThreadCount(threading, allowedThreadCount, elementCount);
+
+    if (limitedThreadCount > 1)
+    {
+        doEstimateCooperativeScalingVectorResourceRequirementsValidated(summaryRequirementsDescriptor, allowedThreadCount, elementCount);
+    }
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::cooperativelyScaleVector(dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+    dReal *vectorData, const dReal *scaleData, unsigned elementCount)
+{
+    dAASSERT(elementCount != 0);
+
+    dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+    unsigned limitedThreadCount = restrictScalingVectorAllowedThreadCount(threading, allowedThreadCount, elementCount);
+
+    if (limitedThreadCount <= 1)
+    {
+        scaleLargeVector<SV_A_STRIDE, SV_D_STRIDE>(vectorData, scaleData, elementCount);
+    }
+    else
+    {
+        doCooperativelyScaleVectorValidated(resourceContainer, limitedThreadCount, vectorData, scaleData, elementCount);
+    }
+}
+
+/*static */
+unsigned ThreadedEquationSolverLDLT::restrictScalingVectorAllowedThreadCount(
+    dxThreadingBase *threading, unsigned allowedThreadCount, unsigned elementCount)
+{
+    unsigned limitedThreadCount = 1;
+
+#if dCOOPERATIVE_ENABLED
+    const unsigned int blockStep = SV_BLOCK_SIZE; // Required by the implementation
+    unsigned scalingBlockCount = deriveScalingVectorBlockCount(elementCount, blockStep);
+    dIASSERT(deriveScalingVectorThreadCount(SV_COOPERATIVE_BLOCK_COUNT_MINIMUM - 1, 2) > 1);
+
+    if (scalingBlockCount >= SV_COOPERATIVE_BLOCK_COUNT_MINIMUM)
+    {
+        limitedThreadCount = threading->calculateThreadingLimitedThreadCount(allowedThreadCount, true);
+    }
+#endif // #if dCOOPERATIVE_ENABLED
+
+    return limitedThreadCount;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::doEstimateCooperativeScalingVectorResourceRequirementsValidated(
+    dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+    unsigned allowedThreadCount, unsigned elementCount)
+{
+    unsigned simultaneousCallCount = 1 + (allowedThreadCount - 1);
+
+    sizeint scalingMemoryRequired = 0;
+    const unsigned scalingAlignmentRequired = 0;
+
+    unsigned featureRequirement = dxResourceRequirementDescriptor::STOCK_CALLWAIT_REQUIRED;
+    summaryRequirementsDescriptor->mergeAnotherDescriptorIn(scalingMemoryRequired, scalingAlignmentRequired, simultaneousCallCount, featureRequirement);
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::doCooperativelyScaleVectorValidated(
+    dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+    dReal *vectorData, const dReal *scaleData, unsigned elementCount)
+{
+    dIASSERT(allowedThreadCount > 1);
+
+    const unsigned int blockStep = SV_BLOCK_SIZE; // Required by the implementation
+    unsigned scalingBlockCount = deriveScalingVectorBlockCount(elementCount, blockStep);
+    dIASSERT(scalingBlockCount > 0U);
+
+    unsigned threadCountToUse = deriveScalingVectorThreadCount(scalingBlockCount - 1, allowedThreadCount);
+    dIASSERT(threadCountToUse > 1);
+
+    dCallWaitID completionWait = resourceContainer->getStockCallWait();
+    dAASSERT(completionWait != NULL);
+
+    atomicord32 blockCompletionProgress;
+
+    initializeCooperativelyScaleVectorMemoryStructures(blockCompletionProgress);
+
+    dCallReleaseeID calculationFinishReleasee;
+    ScaleVectorWorkerContext workerContext; // The variable must exist in the outer scope
+
+    workerContext.init(vectorData, scaleData, elementCount, blockCompletionProgress);
+
+    dxThreadingBase *threading = resourceContainer->getThreadingInstance();
+    threading->PostThreadedCall(NULL, &calculationFinishReleasee, threadCountToUse - 1, NULL, completionWait, &scaleVector_completion_callback, NULL, 0, "ScaleVector Completion");
+    threading->PostThreadedCallsGroup(NULL, threadCountToUse - 1, calculationFinishReleasee, &scaleVector_worker_callback, &workerContext, "ScaleVector Work");
+
+    participateScalingVector<blockStep, SV_A_STRIDE, SV_D_STRIDE>(vectorData, scaleData, elementCount, blockCompletionProgress);
+
+    threading->WaitThreadedCallExclusively(NULL, completionWait, NULL, "ScaleVector End Wait");
+}
+
+
+/*static */
+int ThreadedEquationSolverLDLT::scaleVector_worker_callback(void *callContext, dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    ScaleVectorWorkerContext *ptrContext = (ScaleVectorWorkerContext *)callContext;
+
+    scaleVector_worker(*ptrContext);
+
+    return 1;
+}
+
+/*static */
+void ThreadedEquationSolverLDLT::scaleVector_worker(ScaleVectorWorkerContext &ref_context)
+{
+    const unsigned blockStep = SV_BLOCK_SIZE;
+
+    participateScalingVector<blockStep, SV_A_STRIDE, SV_D_STRIDE>(ref_context.m_vectorData, ref_context.m_scaleData, ref_context.m_elementCount, *ref_context.m_ptrBlockCompletionProgress);
+}
+
+/*static */
+int ThreadedEquationSolverLDLT::scaleVector_completion_callback(void *dUNUSED(callContext), dcallindex_t dUNUSED(callInstanceIndex), dCallReleaseeID dUNUSED(callThisReleasee))
+{
+    return 1;
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Public interface functions
+
+/*extern ODE_API */
+void dScaleVector(dReal *a, const dReal *d, int n)
+{
+    scaleLargeVector<1, 1>(a, d, n);
+}
+
+/*extern ODE_API_DEPRECATED ODE_API */
+void dVectorScale(dReal *a, const dReal *d, int n)
+{
+    scaleLargeVector<1, 1>(a, d, n);
+}
+
+
+/*extern ODE_API */
+void dEstimateCooperativelyScaleVectorResourceRequirements(dResourceRequirementsID requirements,
+    unsigned maximalAllowedThreadCount, unsigned maximalElementCount)
+{
+    dAASSERT(requirements != NULL);
+
+    dxResourceRequirementDescriptor *requirementsDescriptor = (dxResourceRequirementDescriptor *)requirements;
+    ThreadedEquationSolverLDLT::estimateCooperativeScalingVectorResourceRequirements(requirementsDescriptor, maximalAllowedThreadCount, maximalElementCount);
+}
+
+/*extern ODE_API */
+void dCooperativelyScaleVector(dResourceContainerID resources, unsigned allowedThreadCount, 
+    dReal *dataVector, const dReal *scaleVector, unsigned elementCount)
+{
+    dAASSERT(resources != NULL);
+
+    dxRequiredResourceContainer *resourceContainer = (dxRequiredResourceContainer *)resources;
+    ThreadedEquationSolverLDLT::cooperativelyScaleVector(resourceContainer, allowedThreadCount, dataVector, scaleVector, elementCount);
+}
+
diff --git a/libs/ode-0.16.1/ode/src/fastvecscale_impl.h b/libs/ode-0.16.1/ode/src/fastvecscale_impl.h
new file mode 100644
index 0000000..c483fdd
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/fastvecscale_impl.h
@@ -0,0 +1,171 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * Vector scaling function implementation
+ * Improvements and cooperative implementation copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")  
+ */
+
+#ifndef _ODE_FASTVECSCALE_IMPL_H_
+#define _ODE_FASTVECSCALE_IMPL_H_
+
+
+
+template<unsigned int a_stride, unsigned int d_stride>
+void scaleLargeVector(dReal *aStart, const dReal *dStart, unsigned elementCount)
+{
+    dAASSERT (aStart && dStart && elementCount >= 0);
+    
+    const unsigned step = 4;
+
+    dReal *ptrA = aStart;
+    const dReal *ptrD = dStart;
+    const dReal *const dStepsEnd = dStart + (sizeint)(elementCount & ~(step - 1)) * d_stride;
+    for (; ptrD != dStepsEnd; ptrA += step * a_stride, ptrD += step * d_stride) 
+    {
+        dReal a0 = ptrA[0], a1 = ptrA[1 * a_stride], a2 = ptrA[2 * a_stride], a3 = ptrA[3 * a_stride];
+        dReal d0 = ptrD[0], d1 = ptrD[1 * d_stride], d2 = ptrD[2 * d_stride], d3 = ptrD[3 * d_stride];
+        a0 *= d0;
+        a1 *= d1;
+        a2 *= d2;
+        a3 *= d3;
+        ptrA[0] = a0; ptrA[1 * a_stride] = a1; ptrA[2 * a_stride] = a2; ptrA[3 * a_stride] = a3;
+        dSASSERT(step == 4);
+    }
+
+    switch (elementCount & (step - 1))
+    {
+        case 3:
+        {
+            dReal a2 = ptrA[2 * a_stride];
+            dReal d2 = ptrD[2 * d_stride];
+            ptrA[2 * a_stride] = a2 * d2;
+            // break; -- proceed to case 2
+        }
+
+        case 2:
+        {
+            dReal a1 = ptrA[1 * a_stride];
+            dReal d1 = ptrD[1 * d_stride];
+            ptrA[1 * a_stride] = a1 * d1;
+            // break; -- proceed to case 1
+        }
+
+        case 1:
+        {
+            dReal a0 = ptrA[0];
+            dReal d0 = ptrD[0];
+            ptrA[0] = a0 * d0;
+            break;
+        }
+    }
+    dSASSERT(step == 4);
+}
+
+
+template<unsigned int block_step, unsigned int a_stride, unsigned int d_stride>
+/*static */
+void ThreadedEquationSolverLDLT::participateScalingVector(dReal *ptrAStart, const dReal *ptrDStart, const unsigned elementCount,
+    volatile atomicord32 &refBlockCompletionProgress/*=0*/)
+{
+    dAASSERT (ptrAStart != NULL);
+    dAASSERT(ptrDStart != NULL);
+    dAASSERT(elementCount >= 0);
+
+    const unsigned wrapSize = 4;
+    dSASSERT(block_step % wrapSize == 0);
+
+    const unsigned completeBlockCount = elementCount / block_step;
+    const unsigned trailingBlockElements = elementCount % block_step;
+
+    unsigned blockIndex;
+    while ((blockIndex = ThrsafeIncrementIntUpToLimit(&refBlockCompletionProgress, completeBlockCount)) != completeBlockCount)
+    {
+        dReal *ptrAElement = ptrAStart + (sizeint)(blockIndex * block_step) * a_stride;
+        const dReal *ptrDElement = ptrDStart + (sizeint)(blockIndex * block_step) * d_stride;
+        const dReal *const ptrDBlockEnd = ptrDElement + block_step * d_stride;
+        dSASSERT((sizeint)block_step * a_stride < UINT_MAX);
+        dSASSERT((sizeint)block_step * d_stride < UINT_MAX);
+
+        for (; ptrDElement != ptrDBlockEnd; ptrAElement += wrapSize * a_stride, ptrDElement += wrapSize * d_stride)
+        {
+            dReal a0 = ptrAElement[0], a1 = ptrAElement[1 * a_stride], a2 = ptrAElement[2 * a_stride], a3 = ptrAElement[3 * a_stride];
+            dReal d0 = ptrDElement[0], d1 = ptrDElement[1 * d_stride], d2 = ptrDElement[2 * d_stride], d3 = ptrDElement[3 * d_stride];
+            a0 *= d0;
+            a1 *= d1;
+            a2 *= d2;
+            a3 *= d3;
+            ptrAElement[0] = a0; ptrAElement[1 * a_stride] = a1; ptrAElement[2 * a_stride] = a2; ptrAElement[3 * a_stride] = a3;
+            dSASSERT(wrapSize == 4);
+        }
+    }
+
+    if (trailingBlockElements != 0 && (blockIndex = ThrsafeIncrementIntUpToLimit(&refBlockCompletionProgress, completeBlockCount + 1)) != completeBlockCount + 1)
+    {
+        dReal *ptrAElement = ptrAStart + (sizeint)(completeBlockCount * block_step) * a_stride;
+        const dReal *ptrDElement = ptrDStart + (sizeint)(completeBlockCount * block_step) * d_stride;
+        const dReal *const ptrDBlockEnd = ptrDElement + (trailingBlockElements & ~(wrapSize - 1)) * d_stride;
+
+        for (; ptrDElement != ptrDBlockEnd; ptrAElement += wrapSize * a_stride, ptrDElement += wrapSize * d_stride)
+        {
+            dReal a0 = ptrAElement[0], a1 = ptrAElement[1 * a_stride], a2 = ptrAElement[2 * a_stride], a3 = ptrAElement[3 * a_stride];
+            dReal d0 = ptrDElement[0], d1 = ptrDElement[1 * d_stride], d2 = ptrDElement[2 * d_stride], d3 = ptrDElement[3 * d_stride];
+            a0 *= d0;
+            a1 *= d1;
+            a2 *= d2;
+            a3 *= d3;
+            ptrAElement[0] = a0; ptrAElement[1 * a_stride] = a1; ptrAElement[2 * a_stride] = a2; ptrAElement[3 * a_stride] = a3;
+            dSASSERT(wrapSize == 4);
+        }
+
+        switch (trailingBlockElements & (wrapSize - 1))
+        {
+            case 3:
+            {
+                dReal a2 = ptrAElement[2 * a_stride];
+                dReal d2 = ptrDElement[2 * d_stride];
+                ptrAElement[2 * a_stride] = a2 * d2;
+                // break; -- proceed to case 2
+            }
+
+            case 2:
+            {
+                dReal a1 = ptrAElement[1 * a_stride];
+                dReal d1 = ptrDElement[1 * d_stride];
+                ptrAElement[1 * a_stride] = a1 * d1;
+                // break; -- proceed to case 1
+            }
+
+            case 1:
+            {
+                dReal a0 = ptrAElement[0];
+                dReal d0 = ptrDElement[0];
+                ptrAElement[0] = a0 * d0;
+                break;
+            }
+        }
+        dSASSERT(wrapSize == 4);
+    }
+}
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/gimpact_contact_export_helper.cpp b/libs/ode-0.16.1/ode/src/gimpact_contact_export_helper.cpp
new file mode 100644
index 0000000..0e107b0
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/gimpact_contact_export_helper.cpp
@@ -0,0 +1,95 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/collision.h>
+#include "config.h"
+
+
+#if dTRIMESH_ENABLED && dTRIMESH_GIMPACT
+
+#include "gimpact_contact_export_helper.h"
+#include "error.h"
+
+
+/*static */
+dReal dxGImpactContactsExportHelper::FindContactsMarginalDepth(dReal *pdepths, unsigned contactcount, unsigned maxcontacts, dReal mindepth, dReal maxdepth)
+{
+    dReal result;
+
+    while (true)
+    {
+        dReal firstdepth = REAL(0.5) * (mindepth + maxdepth);
+        dReal lowdepth = maxdepth, highdepth = mindepth;
+
+        unsigned marginindex = 0;
+        unsigned highindex = marginindex;
+        dIASSERT(contactcount != 0);
+
+        for (unsigned i = 0; i < contactcount; i++)
+        {
+            dReal depth = pdepths[i];
+
+            if (depth < firstdepth)
+            {
+                dReal temp = pdepths[marginindex]; pdepths[highindex++] = temp; pdepths[marginindex++] = depth;
+                if (highdepth < depth) { highdepth = depth; }
+            }
+            else if (depth > firstdepth)
+            {
+                pdepths[highindex++] = depth;
+                if (depth < lowdepth) { lowdepth = depth; }
+            }
+        }
+
+        unsigned countabove = highindex - marginindex;
+        if (maxcontacts < countabove)
+        {
+            contactcount = countabove;
+            pdepths += marginindex;
+            mindepth = lowdepth;
+        }
+        else if (maxcontacts == countabove)
+        {
+            result = dNextAfter(firstdepth, dInfinity);
+            break;
+        }
+        else
+        {
+            unsigned countbelow = marginindex;
+            if (maxcontacts <= contactcount - countbelow)
+            {
+                result = firstdepth;
+                break;
+            }
+
+            maxcontacts -= contactcount - countbelow;
+            contactcount = countbelow;
+            maxdepth = highdepth;
+        }
+    }
+
+    return result;
+}
+
+
+#endif // #if dTRIMESH_ENABLED && dTRIMESH_GIMPACT
+
diff --git a/libs/ode-0.16.1/ode/src/gimpact_contact_export_helper.h b/libs/ode-0.16.1/ode/src/gimpact_contact_export_helper.h
new file mode 100644
index 0000000..149ac91
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/gimpact_contact_export_helper.h
@@ -0,0 +1,177 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#ifndef _ODE_GIMPACT_CONTACT_EXPORT_HELPER_H_
+#define _ODE_GIMPACT_CONTACT_EXPORT_HELPER_H_
+
+
+#include "collision_kernel.h"
+#include "collision_util.h"
+#include "util.h"
+
+
+#ifndef ALLOCA
+#define ALLOCA(x) dALLOCA16(x)
+#endif
+
+
+struct dxGImpactContactsExportHelper
+{
+public:
+    template<class dxGImpactContactAccessor>
+    static unsigned ExportMaxDepthGImpactContacts(dxGImpactContactAccessor &srccontacts, unsigned contactcount,
+        int Flags, dContactGeom* Contacts, int Stride)
+    {
+        unsigned result;
+
+        unsigned maxcontacts = (unsigned)(Flags & NUMC_MASK);
+        if (contactcount > maxcontacts)
+        {
+            ExportExcesssiveContacts(srccontacts, contactcount, Flags, Contacts, Stride);
+            result = maxcontacts;
+        }
+        else
+        {
+            ExportFitContacts(srccontacts, contactcount, Flags, Contacts, Stride);
+            result = contactcount;
+        }
+
+        return result;
+    }
+
+private:
+    template<class dxGImpactContactAccessor>
+    static void ExportExcesssiveContacts(dxGImpactContactAccessor &srccontacts, unsigned contactcount,
+        int Flags, dContactGeom* Contacts, int Stride);
+    template<class dxGImpactContactAccessor>
+    static void ExportFitContacts(dxGImpactContactAccessor &srccontacts, unsigned contactcount,
+        int Flags, dContactGeom* Contacts, int Stride);
+    template<class dxGImpactContactAccessor>
+    static dReal FindContactsMarginalDepth(dxGImpactContactAccessor &srccontacts, unsigned contactcount, unsigned maxcontacts);
+    static dReal FindContactsMarginalDepth(dReal *pdepths, unsigned contactcount, unsigned maxcontacts, dReal mindepth, dReal maxdepth);
+};
+
+
+template<class dxGImpactContactAccessor> 
+/*static */
+void dxGImpactContactsExportHelper::ExportExcesssiveContacts(dxGImpactContactAccessor &srccontacts, unsigned contactcount,
+    int Flags, dContactGeom* Contacts, int Stride)
+{
+    unsigned maxcontacts = (unsigned)(Flags & NUMC_MASK);
+    dReal marginaldepth = FindContactsMarginalDepth(srccontacts, contactcount, maxcontacts);
+
+    unsigned contactshead = 0, contacttail = maxcontacts;
+    for (unsigned i = 0; i < contactcount; i++)
+    {
+        dReal depth = srccontacts.RetrieveDepthByIndex(i);
+
+        if (depth > marginaldepth)
+        {
+            dContactGeom *pcontact = SAFECONTACT(Flags, Contacts, contactshead, Stride);
+            srccontacts.ExportContactGeomByIndex(pcontact, i);
+
+            if (++contactshead == maxcontacts)
+            {
+                break;
+            }
+        }
+        else if (depth == marginaldepth && contactshead < contacttail)
+        {
+            --contacttail;
+
+            dContactGeom *pcontact = SAFECONTACT(Flags, Contacts, contacttail, Stride);
+            srccontacts.ExportContactGeomByIndex(pcontact, i);
+        }
+    }
+}
+
+template<class dxGImpactContactAccessor>
+/*static */
+void dxGImpactContactsExportHelper::ExportFitContacts(dxGImpactContactAccessor &srccontacts, unsigned contactcount,
+    int Flags, dContactGeom* Contacts, int Stride)
+{
+    for (unsigned i = 0; i < contactcount; i++)
+    {
+        dContactGeom *pcontact = SAFECONTACT(Flags, Contacts, i, Stride);
+
+        srccontacts.ExportContactGeomByIndex(pcontact, i);
+    }
+}
+
+template<class dxGImpactContactAccessor>
+/*static */
+dReal dxGImpactContactsExportHelper::FindContactsMarginalDepth(dxGImpactContactAccessor &srccontacts, unsigned contactcount, unsigned maxcontacts)
+{
+    dReal result;
+
+    dReal *pdepths = (dReal *)ALLOCA(contactcount * sizeof(dReal));
+    unsigned marginindex = 0;
+    unsigned highindex = marginindex;
+
+    dReal firstdepth = srccontacts.RetrieveDepthByIndex(0);
+    dReal mindepth = firstdepth, maxdepth = firstdepth;
+    dIASSERT(contactcount > 1);
+
+    for (unsigned i = 1; i < contactcount; i++)
+    {
+        dReal depth = srccontacts.RetrieveDepthByIndex(i);
+
+        if (depth < firstdepth)
+        {
+            dReal temp = pdepths[marginindex]; pdepths[highindex++] = temp; pdepths[marginindex++] = depth;
+            if (depth < mindepth) { mindepth = depth; }
+        }
+        else if (depth > firstdepth)
+        {
+            pdepths[highindex++] = depth;
+            if (maxdepth < depth) { maxdepth = depth; }
+        }
+    }
+
+    unsigned countabove = highindex - marginindex;
+    if (maxcontacts < countabove)
+    {
+        result = FindContactsMarginalDepth(pdepths + marginindex, countabove, maxcontacts, firstdepth, maxdepth);
+    }
+    else if (maxcontacts == countabove)
+    {
+        result = dNextAfter(firstdepth, dInfinity);
+    }
+    else
+    {
+        unsigned countbelow = marginindex;
+        if (maxcontacts <= contactcount - countbelow)
+        {
+            result = firstdepth;
+        }
+        else
+        {
+            result = FindContactsMarginalDepth(pdepths, countbelow, maxcontacts - (contactcount - countbelow), mindepth, firstdepth);
+        }
+    }
+
+    return result;
+}
+
+
+#endif	//_ODE_GIMPACT_CONTACT_EXPORT_HELPER_H_
diff --git a/libs/ode-0.16.1/ode/src/gimpact_gim_contact_accessor.h b/libs/ode-0.16.1/ode/src/gimpact_gim_contact_accessor.h
new file mode 100644
index 0000000..2b252b4
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/gimpact_gim_contact_accessor.h
@@ -0,0 +1,62 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#ifndef _ODE_GIMPACT_GIM_CONTACT_ACCESSOR_H_
+#define _ODE_GIMPACT_GIM_CONTACT_ACCESSOR_H_
+
+
+struct dxGIMCContactAccessor
+{
+    dxGIMCContactAccessor(GIM_CONTACT *ptrimeshcontacts, dGeomID g1, dGeomID g2) : m_ptrimeshcontacts(ptrimeshcontacts), m_g1(g1), m_g2(g2), m_gotside2ovr(false), m_side2ovr() {}
+    dxGIMCContactAccessor(GIM_CONTACT *ptrimeshcontacts, dGeomID g1, dGeomID g2, int side2ovr) : m_ptrimeshcontacts(ptrimeshcontacts), m_g1(g1), m_g2(g2), m_gotside2ovr(true), m_side2ovr(side2ovr) {}
+
+    dReal RetrieveDepthByIndex(unsigned index) const { return m_ptrimeshcontacts[index].m_depth; }
+
+    void ExportContactGeomByIndex(dContactGeom *pcontact, unsigned index) const
+    {
+        const GIM_CONTACT *ptrimeshcontact = m_ptrimeshcontacts + index;
+        pcontact->pos[0] = ptrimeshcontact->m_point[0];
+        pcontact->pos[1] = ptrimeshcontact->m_point[1];
+        pcontact->pos[2] = ptrimeshcontact->m_point[2];
+        pcontact->pos[3] = REAL(1.0);
+
+        pcontact->normal[0] = ptrimeshcontact->m_normal[0];
+        pcontact->normal[1] = ptrimeshcontact->m_normal[1];
+        pcontact->normal[2] = ptrimeshcontact->m_normal[2];
+        pcontact->normal[3] = 0;
+
+        pcontact->depth = ptrimeshcontact->m_depth;
+        pcontact->g1 = m_g1;
+        pcontact->g2 = m_g2;
+        pcontact->side1 = ptrimeshcontact->m_feature1;
+        pcontact->side2 = !m_gotside2ovr ? ptrimeshcontact->m_feature2 : m_side2ovr;
+    }
+
+    const GIM_CONTACT *m_ptrimeshcontacts;
+    dGeomID         m_g1, m_g2;
+    bool            m_gotside2ovr;
+    int             m_side2ovr;
+};
+
+
+#endif	//_ODE_GIMPACT_GIM_CONTACT_ACCESSOR_H_
diff --git a/libs/ode-0.16.1/ode/src/gimpact_plane_contact_accessor.h b/libs/ode-0.16.1/ode/src/gimpact_plane_contact_accessor.h
new file mode 100644
index 0000000..035dcfd
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/gimpact_plane_contact_accessor.h
@@ -0,0 +1,62 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#ifndef _ODE_GIMPACT_PLANE_CONTACT_ACCESSOR_H_
+#define _ODE_GIMPACT_PLANE_CONTACT_ACCESSOR_H_
+
+
+struct dxPlaneContactAccessor
+{
+    dxPlaneContactAccessor(const vec4f *planecontact_results, const dReal *plane, dGeomID g1, dGeomID g2) : m_planecontact_results(planecontact_results), m_plane(plane), m_g1(g1), m_g2(g2) {}
+
+    dReal RetrieveDepthByIndex(unsigned index) const { return m_planecontact_results[index][3]; }
+
+    void ExportContactGeomByIndex(dContactGeom *pcontact, unsigned index) const
+    {
+        const vec4f *planecontact = m_planecontact_results + index;
+
+        pcontact->pos[0] = (*planecontact)[0];
+        pcontact->pos[1] = (*planecontact)[1];
+        pcontact->pos[2] = (*planecontact)[2];
+        pcontact->pos[3] = REAL(1.0);
+
+        const dReal *plane = m_plane;
+        pcontact->normal[0] = plane[0];
+        pcontact->normal[1] = plane[1];
+        pcontact->normal[2] = plane[2];
+        pcontact->normal[3] = 0;
+
+        pcontact->depth = (*planecontact)[3];
+        pcontact->g1 = m_g1; // trimesh geom
+        pcontact->g2 = m_g2; // plane geom
+        pcontact->side1 = -1; // note: don't have the triangle index, but OPCODE *does* do this properly
+        pcontact->side2 = -1;
+    }
+
+    const vec4f     *m_planecontact_results;
+    const dReal     *m_plane;
+    dGeomID         m_g1, m_g2;
+};
+
+
+#endif	//_ODE_GIMPACT_PLANE_CONTACT_ACCESSOR_H_
diff --git a/libs/ode-0.16.1/ode/src/heightfield.cpp b/libs/ode-0.16.1/ode/src/heightfield.cpp
new file mode 100644
index 0000000..71699db
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/heightfield.cpp
@@ -0,0 +1,1876 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// dHeightfield Collider
+//  Paul Cheyrou-Lagreze aka Tuan Kuranes 2006 Speed enhancements http://www.pop-3d.com
+//  Martijn Buijs 2006 http://home.planet.nl/~buijs512/
+// Based on Terrain & Cone contrib by:
+//  Benoit CHAPEROT 2003-2004 http://www.jstarlab.com
+//  Some code inspired by Magic Software
+
+
+#include <ode/common.h>
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"
+#include "collision_std.h"
+#include "collision_util.h"
+#include "heightfield.h"
+
+
+
+#if dTRIMESH_ENABLED
+#include "collision_trimesh_colliders.h"
+#endif // dTRIMESH_ENABLED
+
+#define dMIN(A,B)  ((A)>(B) ? (B) : (A))
+#define dMAX(A,B)  ((A)>(B) ? (A) : (B))
+
+
+// Three-way MIN and MAX
+#define dMIN3(A,B,C)	( (A)<(B) ? dMIN((A),(C)) : dMIN((B),(C)) )
+#define dMAX3(A,B,C)	( (A)>(B) ? dMAX((A),(C)) : dMAX((B),(C)) )
+
+#define dOPESIGN(a, op1, op2,b) \
+    (a)[0] op1 op2 ((b)[0]); \
+    (a)[1] op1 op2 ((b)[1]); \
+    (a)[2] op1 op2 ((b)[2]);
+
+#define dGeomRaySetNoNormalize(myRay, MyPoint, MyVector) {  \
+    \
+    dVector3Copy (MyPoint, (myRay).final_posr->pos);   \
+    (myRay).final_posr->R[2] = (MyVector)[0];       \
+    (myRay).final_posr->R[6] = (MyVector)[1];       \
+    (myRay).final_posr->R[10] = (MyVector)[2];      \
+    dGeomMoved (&myRay);                        \
+            }
+
+#define dGeomPlaneSetNoNormalize(MyPlane, MyPlaneDef) { \
+    \
+    (MyPlane)->p[0] = (MyPlaneDef)[0];  \
+    (MyPlane)->p[1] = (MyPlaneDef)[1];  \
+    (MyPlane)->p[2] = (MyPlaneDef)[2];  \
+    (MyPlane)->p[3] = (MyPlaneDef)[3];  \
+    dGeomMoved (MyPlane);           \
+                    }
+//////// Local Build Option ////////////////////////////////////////////////////
+
+// Uncomment this #define to use the (0,0) corner of the geom as the origin,
+// rather than the center. This was the way the original heightfield worked,
+// but as it does not match the way all other geometries work, so for constancy it
+// was changed to work like this.
+
+// #define DHEIGHTFIELD_CORNER_ORIGIN
+
+
+// Uncomment this #define to add heightfield triangles edge colliding
+// Code is not guaranteed and I didn't find the need to add that as 
+// colliding planes triangles and edge triangles seems enough.
+// #define _HEIGHTFIELDEDGECOLLIDING
+
+
+//////// dxHeightfieldData /////////////////////////////////////////////////////////////
+
+// dxHeightfieldData constructor
+dxHeightfieldData::dxHeightfieldData():
+    m_fWidth( 0 ),
+    m_fDepth( 0 ),
+    m_fSampleWidth( 0 ),
+    m_fSampleDepth( 0 ),
+    m_fSampleZXAspect( 0 ),
+    m_fInvSampleWidth( 0 ),
+    m_fInvSampleDepth( 0 ),
+
+    m_fHalfWidth( 0 ),
+    m_fHalfDepth( 0 ),
+
+    m_fMinHeight( 0 ),
+    m_fMaxHeight( 0 ),
+    m_fThickness( 0 ),
+    m_fScale( 0 ),
+    m_fOffset( 0 ),
+
+    m_nWidthSamples( 0 ),
+    m_nDepthSamples( 0 ),
+    m_bCopyHeightData( 0 ),
+    m_bWrapMode( 0 ),
+    m_nGetHeightMode( 0 ),
+
+    m_pHeightData( NULL ),
+    m_pUserData( NULL ),
+
+    m_pGetHeightCallback( NULL )
+{
+    memset( m_contacts, 0, sizeof( m_contacts ) );
+}
+
+// build Heightfield data
+void dxHeightfieldData::SetData( int nWidthSamples, int nDepthSamples,
+                                dReal fWidth, dReal fDepth,
+                                dReal fScale, dReal fOffset, dReal fThickness,
+                                int bWrapMode )
+{
+    dIASSERT( fWidth > REAL( 0.0 ) );
+    dIASSERT( fDepth > REAL( 0.0 ) );
+    dIASSERT( nWidthSamples > 0 );
+    dIASSERT( nDepthSamples > 0 );
+
+    // x,z bounds
+    m_fWidth = fWidth;
+    m_fDepth = fDepth;
+
+    // cache half x,z bounds
+    m_fHalfWidth = fWidth / REAL( 2.0 );
+    m_fHalfDepth = fDepth / REAL( 2.0 );
+
+    // scale and offset
+    m_fScale = fScale;
+    m_fOffset = fOffset;
+
+    // infinite min height bounds
+    m_fThickness = fThickness;
+
+    // number of vertices per side
+    m_nWidthSamples = nWidthSamples;
+    m_nDepthSamples = nDepthSamples;
+
+    m_fSampleWidth = m_fWidth / ( m_nWidthSamples - REAL( 1.0 ) );
+    m_fSampleDepth = m_fDepth / ( m_nDepthSamples - REAL( 1.0 ) );
+
+    m_fSampleZXAspect = m_fSampleDepth / m_fSampleWidth;
+
+    m_fInvSampleWidth = REAL( 1.0 ) / m_fSampleWidth;
+    m_fInvSampleDepth = REAL( 1.0 ) / m_fSampleDepth;
+
+    // finite or repeated terrain?
+    m_bWrapMode = bWrapMode;
+}
+
+
+// recomputes heights bounds
+void dxHeightfieldData::ComputeHeightBounds()
+{
+    int i;
+    dReal h;
+    unsigned char *data_byte;
+    short *data_short;
+    float *data_float;
+    double *data_double;
+
+    switch ( m_nGetHeightMode )
+    {
+
+        // callback
+    case 0:
+        // change nothing, keep using default or user specified bounds
+        return;
+
+        // byte
+    case 1:
+        data_byte = (unsigned char*)m_pHeightData;
+        m_fMinHeight = dInfinity;
+        m_fMaxHeight = -dInfinity;
+
+        for (i=0; i<m_nWidthSamples*m_nDepthSamples; i++)
+        {
+            h = data_byte[i];
+            if (h < m_fMinHeight)	m_fMinHeight = h;
+            if (h > m_fMaxHeight)	m_fMaxHeight = h;
+        }
+
+        break;
+
+        // short
+    case 2:
+        data_short = (short*)m_pHeightData;
+        m_fMinHeight = dInfinity;
+        m_fMaxHeight = -dInfinity;
+
+        for (i=0; i<m_nWidthSamples*m_nDepthSamples; i++)
+        {
+            h = data_short[i];
+            if (h < m_fMinHeight)	m_fMinHeight = h;
+            if (h > m_fMaxHeight)	m_fMaxHeight = h;
+        }
+
+        break;
+
+        // float
+    case 3:
+        data_float = (float*)m_pHeightData;
+        m_fMinHeight = dInfinity;
+        m_fMaxHeight = -dInfinity;
+
+        for (i=0; i<m_nWidthSamples*m_nDepthSamples; i++)
+        {
+            h = data_float[i];
+            if (h < m_fMinHeight)	m_fMinHeight = h;
+            if (h > m_fMaxHeight)	m_fMaxHeight = h;
+        }
+
+        break;
+
+        // double
+    case 4:
+        data_double = (double*)m_pHeightData;
+        m_fMinHeight = dInfinity;
+        m_fMaxHeight = -dInfinity;
+
+        for (i=0; i<m_nWidthSamples*m_nDepthSamples; i++)
+        {
+            h = static_cast< dReal >( data_double[i] );
+            if (h < m_fMinHeight)	m_fMinHeight = h;
+            if (h > m_fMaxHeight)	m_fMaxHeight = h;
+        }
+
+        break;
+
+    }
+
+    // scale and offset
+    m_fMinHeight *= m_fScale;
+    m_fMaxHeight *= m_fScale;
+    m_fMinHeight += m_fOffset;
+    m_fMaxHeight += m_fOffset;
+
+    // add thickness
+    m_fMinHeight -= m_fThickness;
+}
+
+
+// returns whether point is over terrain Cell triangle?
+bool dxHeightfieldData::IsOnHeightfield2 ( const HeightFieldVertex * const CellCorner, 
+                                          const dReal * const pos,  const bool isABC) const
+{
+    // WARNING!!!
+    // This function must be written in the way to make sure that every point on
+    // XZ plane falls in one and only one triangle. Keep that in mind if you 
+    // intend to change the code.
+    // Also remember about computational errors and possible mismatches in 
+    // values if they are calculated differently in different places in the code.
+    // Currently both the implementation has been optimized and effects of 
+    // computational errors have been eliminated.
+
+    dReal MaxX, MinX;
+    dReal MaxZ, MinZ;
+
+    if (isABC)
+    {
+        // point A
+        MinX = CellCorner->vertex[0];
+        if (pos[0] < MinX)
+            return false;
+
+        MaxX = (CellCorner->coords[0] + 1) * m_fSampleWidth;
+        if (pos[0] >= MaxX)
+            return false;
+
+        MinZ = CellCorner->vertex[2];
+        if (pos[2] < MinZ)
+            return false;
+
+        MaxZ = (CellCorner->coords[1] + 1) * m_fSampleDepth;
+        if (pos[2] >= MaxZ)
+            return false;
+
+        return (MaxZ - pos[2]) > (pos[0] - MinX) * m_fSampleZXAspect;
+    }
+    else
+    {
+        // point D
+        MaxX = CellCorner->vertex[0];
+        if (pos[0] >= MaxX)
+            return false;
+
+        MinX = (CellCorner->coords[0] - 1) * m_fSampleWidth;
+        if (pos[0] < MinX)
+            return false;
+
+        MaxZ = CellCorner->vertex[2];
+        if (pos[2] >= MaxZ)
+            return false;
+
+        MinZ = (CellCorner->coords[1] - 1) * m_fSampleDepth;
+        if (pos[2] < MinZ)
+            return false;
+
+        return (MaxZ - pos[2]) <= (pos[0] - MinX) * m_fSampleZXAspect;
+    }
+}
+
+
+// returns height at given sample coordinates
+dReal dxHeightfieldData::GetHeight( int x, int z )
+{
+    dReal h=0;
+    unsigned char *data_byte;
+    short *data_short;
+    float *data_float;
+    double *data_double;
+
+    if ( m_bWrapMode == 0 )
+    {
+        // Finite
+        if ( x < 0 ) x = 0;
+        if ( z < 0 ) z = 0;
+        if ( x > m_nWidthSamples - 1 ) x = m_nWidthSamples - 1;
+        if ( z > m_nDepthSamples - 1 ) z = m_nDepthSamples - 1;
+    }
+    else
+    {
+        // Infinite
+        x %= m_nWidthSamples - 1;
+        z %= m_nDepthSamples - 1;
+        if ( x < 0 ) x += m_nWidthSamples - 1;
+        if ( z < 0 ) z += m_nDepthSamples - 1;
+    }
+
+    switch ( m_nGetHeightMode )
+    {
+
+        // callback (dReal)
+    case 0:
+        h = (*m_pGetHeightCallback)(m_pUserData, x, z);
+        break;
+
+        // byte
+    case 1:
+        data_byte = (unsigned char*)m_pHeightData;
+        h = data_byte[x+(z * m_nWidthSamples)];
+        break;
+
+        // short
+    case 2:
+        data_short = (short*)m_pHeightData;
+        h = data_short[x+(z * m_nWidthSamples)];
+        break;
+
+        // float
+    case 3:
+        data_float = (float*)m_pHeightData;
+        h = data_float[x+(z * m_nWidthSamples)];
+        break;
+
+        // double
+    case 4:
+        data_double = (double*)m_pHeightData;
+        h = (dReal)( data_double[x+(z * m_nWidthSamples)] );
+        break;
+    }
+
+    return (h * m_fScale) + m_fOffset;
+}
+
+
+// returns height at given coordinates
+dReal dxHeightfieldData::GetHeight( dReal x, dReal z )
+{
+    dReal dnX = dFloor( x * m_fInvSampleWidth );
+    dReal dnZ = dFloor( z * m_fInvSampleDepth );
+
+    dReal dx = ( x - ( dnX * m_fSampleWidth ) ) * m_fInvSampleWidth;
+    dReal dz = ( z - ( dnZ * m_fSampleDepth ) ) * m_fInvSampleDepth;
+
+    int nX = int( dnX );
+    int nZ = int( dnZ );
+
+    //dIASSERT( ( dx + dEpsilon >= 0.0f ) && ( dx - dEpsilon <= 1.0f ) );
+    //dIASSERT( ( dz + dEpsilon >= 0.0f ) && ( dz - dEpsilon <= 1.0f ) );
+
+    dReal y, y0;
+
+    if ( dx + dz <= REAL( 1.0 ) ) // Use <= comparison to prefer simpler branch
+    {
+        y0 = GetHeight( nX, nZ );
+
+        y = y0 + ( GetHeight( nX + 1, nZ ) - y0 ) * dx
+            + ( GetHeight( nX, nZ + 1 ) - y0 ) * dz;
+    }
+    else
+    {
+        y0 = GetHeight( nX + 1, nZ + 1 );
+
+        y = y0	+ ( GetHeight( nX + 1, nZ ) - y0 ) * ( REAL(1.0) - dz ) +
+            ( GetHeight( nX, nZ + 1 ) - y0 ) * ( REAL(1.0) - dx );
+    }
+
+    return y;
+}
+
+
+// dxHeightfieldData destructor
+dxHeightfieldData::~dxHeightfieldData()
+{
+    unsigned char *data_byte;
+    short *data_short;
+    float *data_float;
+    double *data_double;
+
+    if ( m_bCopyHeightData )
+    {
+        switch ( m_nGetHeightMode )
+        {
+
+            // callback
+        case 0:
+            // do nothing
+            break;
+
+            // byte
+        case 1:
+            dIASSERT( m_pHeightData );
+            data_byte = (unsigned char*)m_pHeightData;
+            delete [] data_byte;
+            break;
+
+            // short
+        case 2:
+            dIASSERT( m_pHeightData );
+            data_short = (short*)m_pHeightData;
+            delete [] data_short;
+            break;
+
+            // float
+        case 3:
+            dIASSERT( m_pHeightData );
+            data_float = (float*)m_pHeightData;
+            delete [] data_float;
+            break;
+
+            // double
+        case 4:
+            dIASSERT( m_pHeightData );
+            data_double = (double*)m_pHeightData;
+            delete [] data_double;
+            break;
+
+        }
+    }
+}
+
+
+//////// dxHeightfield /////////////////////////////////////////////////////////////////
+
+
+// dxHeightfield constructor
+dxHeightfield::dxHeightfield( dSpaceID space,
+                             dHeightfieldDataID data,
+                             int bPlaceable )			:
+    dxGeom( space, bPlaceable ),
+    tempPlaneBuffer(0),
+    tempPlaneInstances(0),
+    tempPlaneBufferSize(0),
+    tempTriangleBuffer(0),
+    tempTriangleBufferSize(0),
+    tempHeightBuffer(0),
+    tempHeightInstances(0),
+    tempHeightBufferSizeX(0),
+    tempHeightBufferSizeZ(0)
+{
+    type = dHeightfieldClass;
+    this->m_p_data = data;
+}
+
+
+// compute axis aligned bounding box
+void dxHeightfield::computeAABB()
+{
+    const dxHeightfieldData *d = m_p_data;
+
+    if ( d->m_bWrapMode == 0 )
+    {
+        // Finite
+        if ( gflags & GEOM_PLACEABLE )
+        {
+            dReal dx[6], dy[6], dz[6];
+
+            // Y-axis
+            if (d->m_fMinHeight != -dInfinity)
+            {
+                dy[0] = ( final_posr->R[ 1] * d->m_fMinHeight );
+                dy[1] = ( final_posr->R[ 5] * d->m_fMinHeight );
+                dy[2] = ( final_posr->R[ 9] * d->m_fMinHeight );
+            }
+            else
+            {
+                // Multiplication is performed to obtain infinity of correct sign
+                dy[0] = ( final_posr->R[ 1] ? final_posr->R[ 1] * -dInfinity : REAL(0.0) );
+                dy[1] = ( final_posr->R[ 5] ? final_posr->R[ 5] * -dInfinity : REAL(0.0) );
+                dy[2] = ( final_posr->R[ 9] ? final_posr->R[ 9] * -dInfinity : REAL(0.0) );
+            }
+
+            if (d->m_fMaxHeight != dInfinity)
+            {
+                dy[3] = ( final_posr->R[ 1] * d->m_fMaxHeight );
+                dy[4] = ( final_posr->R[ 5] * d->m_fMaxHeight );
+                dy[5] = ( final_posr->R[ 9] * d->m_fMaxHeight );
+            }
+            else
+            {
+                dy[3] = ( final_posr->R[ 1] ? final_posr->R[ 1] * dInfinity : REAL(0.0) );
+                dy[4] = ( final_posr->R[ 5] ? final_posr->R[ 5] * dInfinity : REAL(0.0) );
+                dy[5] = ( final_posr->R[ 9] ? final_posr->R[ 9] * dInfinity : REAL(0.0) );
+            }
+
+#ifdef DHEIGHTFIELD_CORNER_ORIGIN
+
+            // X-axis
+            dx[0] = 0;	dx[3] = ( final_posr->R[ 0] * d->m_fWidth );
+            dx[1] = 0;	dx[4] = ( final_posr->R[ 4] * d->m_fWidth );
+            dx[2] = 0;	dx[5] = ( final_posr->R[ 8] * d->m_fWidth );
+
+            // Z-axis
+            dz[0] = 0;	dz[3] = ( final_posr->R[ 2] * d->m_fDepth );
+            dz[1] = 0;	dz[4] = ( final_posr->R[ 6] * d->m_fDepth );
+            dz[2] = 0;	dz[5] = ( final_posr->R[10] * d->m_fDepth );
+
+#else // DHEIGHTFIELD_CORNER_ORIGIN
+
+            // X-axis
+            dx[0] = ( final_posr->R[ 0] * -d->m_fHalfWidth );
+            dx[1] = ( final_posr->R[ 4] * -d->m_fHalfWidth );
+            dx[2] = ( final_posr->R[ 8] * -d->m_fHalfWidth );
+            dx[3] = ( final_posr->R[ 0] * d->m_fHalfWidth );
+            dx[4] = ( final_posr->R[ 4] * d->m_fHalfWidth );
+            dx[5] = ( final_posr->R[ 8] * d->m_fHalfWidth );
+
+            // Z-axis
+            dz[0] = ( final_posr->R[ 2] * -d->m_fHalfDepth );
+            dz[1] = ( final_posr->R[ 6] * -d->m_fHalfDepth );
+            dz[2] = ( final_posr->R[10] * -d->m_fHalfDepth );
+            dz[3] = ( final_posr->R[ 2] * d->m_fHalfDepth );
+            dz[4] = ( final_posr->R[ 6] * d->m_fHalfDepth );
+            dz[5] = ( final_posr->R[10] * d->m_fHalfDepth );
+
+#endif // DHEIGHTFIELD_CORNER_ORIGIN
+
+            // X extents
+            aabb[0] = final_posr->pos[0] +
+                dMIN3( dMIN( dx[0], dx[3] ), dMIN( dy[0], dy[3] ), dMIN( dz[0], dz[3] ) );
+            aabb[1] = final_posr->pos[0] +
+                dMAX3( dMAX( dx[0], dx[3] ), dMAX( dy[0], dy[3] ), dMAX( dz[0], dz[3] ) );
+
+            // Y extents
+            aabb[2] = final_posr->pos[1] +
+                dMIN3( dMIN( dx[1], dx[4] ), dMIN( dy[1], dy[4] ), dMIN( dz[1], dz[4] ) );
+            aabb[3] = final_posr->pos[1] +
+                dMAX3( dMAX( dx[1], dx[4] ), dMAX( dy[1], dy[4] ), dMAX( dz[1], dz[4] ) );
+
+            // Z extents
+            aabb[4] = final_posr->pos[2] +
+                dMIN3( dMIN( dx[2], dx[5] ), dMIN( dy[2], dy[5] ), dMIN( dz[2], dz[5] ) );
+            aabb[5] = final_posr->pos[2] +
+                dMAX3( dMAX( dx[2], dx[5] ), dMAX( dy[2], dy[5] ), dMAX( dz[2], dz[5] ) );
+        }
+        else
+        {
+
+#ifdef DHEIGHTFIELD_CORNER_ORIGIN
+
+            aabb[0] = 0;					aabb[1] = d->m_fWidth;
+            aabb[2] = d->m_fMinHeight;		aabb[3] = d->m_fMaxHeight;
+            aabb[4] = 0;					aabb[5] = d->m_fDepth;
+
+#else // DHEIGHTFIELD_CORNER_ORIGIN
+
+            aabb[0] = -d->m_fHalfWidth;		aabb[1] = +d->m_fHalfWidth;
+            aabb[2] = d->m_fMinHeight;		aabb[3] = d->m_fMaxHeight;
+            aabb[4] = -d->m_fHalfDepth;		aabb[5] = +d->m_fHalfDepth;
+
+#endif // DHEIGHTFIELD_CORNER_ORIGIN
+
+        }
+    }
+    else
+    {
+        // Infinite
+        if ( gflags & GEOM_PLACEABLE )
+        {
+            aabb[0] = -dInfinity;			aabb[1] = +dInfinity;
+            aabb[2] = -dInfinity;			aabb[3] = +dInfinity;
+            aabb[4] = -dInfinity;			aabb[5] = +dInfinity;
+        }
+        else
+        {
+            aabb[0] = -dInfinity;			aabb[1] = +dInfinity;
+            aabb[2] = d->m_fMinHeight;		aabb[3] = d->m_fMaxHeight;
+            aabb[4] = -dInfinity;			aabb[5] = +dInfinity;
+        }
+    }
+
+}
+
+
+// dxHeightfield destructor
+dxHeightfield::~dxHeightfield()
+{
+    resetTriangleBuffer();
+    resetPlaneBuffer();
+    resetHeightBuffer();
+}
+
+void dxHeightfield::allocateTriangleBuffer(sizeint numTri)
+{
+    sizeint alignedNumTri = AlignBufferSize(numTri, TEMP_TRIANGLE_BUFFER_ELEMENT_COUNT_ALIGNMENT);
+    tempTriangleBufferSize = alignedNumTri;
+    tempTriangleBuffer = new HeightFieldTriangle[alignedNumTri];
+}
+
+void dxHeightfield::resetTriangleBuffer()
+{
+    delete[] tempTriangleBuffer;
+}
+
+void dxHeightfield::allocatePlaneBuffer(sizeint numTri)
+{
+    sizeint alignedNumTri = AlignBufferSize(numTri, TEMP_PLANE_BUFFER_ELEMENT_COUNT_ALIGNMENT);
+    tempPlaneBufferSize = alignedNumTri;
+    tempPlaneBuffer = new HeightFieldPlane *[alignedNumTri];
+    tempPlaneInstances = new HeightFieldPlane[alignedNumTri];
+
+    HeightFieldPlane *ptrPlaneMatrix = tempPlaneInstances;
+    for (sizeint indexTri = 0; indexTri != alignedNumTri; indexTri++)
+    {
+        tempPlaneBuffer[indexTri] = ptrPlaneMatrix;
+        ptrPlaneMatrix += 1;
+    }
+}
+
+void dxHeightfield::resetPlaneBuffer()
+{
+    delete[] tempPlaneInstances;
+    delete[] tempPlaneBuffer;
+}
+
+void dxHeightfield::allocateHeightBuffer(sizeint numX, sizeint numZ)
+{
+    sizeint alignedNumX = AlignBufferSize(numX, TEMP_HEIGHT_BUFFER_ELEMENT_COUNT_ALIGNMENT_X);
+    sizeint alignedNumZ = AlignBufferSize(numZ, TEMP_HEIGHT_BUFFER_ELEMENT_COUNT_ALIGNMENT_Z);
+    tempHeightBufferSizeX = alignedNumX;
+    tempHeightBufferSizeZ = alignedNumZ;
+    tempHeightBuffer = new HeightFieldVertex *[alignedNumX];
+    sizeint numCells = alignedNumX * alignedNumZ;
+    tempHeightInstances = new HeightFieldVertex [numCells];
+
+    HeightFieldVertex *ptrHeightMatrix = tempHeightInstances;
+    for (sizeint indexX = 0; indexX != alignedNumX; indexX++)
+    {
+        tempHeightBuffer[indexX] = ptrHeightMatrix;
+        ptrHeightMatrix += alignedNumZ;
+    }
+}
+
+void dxHeightfield::resetHeightBuffer()
+{
+    delete[] tempHeightInstances;
+    delete[] tempHeightBuffer;
+}
+//////// Heightfield data interface ////////////////////////////////////////////////////
+
+
+dHeightfieldDataID dGeomHeightfieldDataCreate()
+{
+    return new dxHeightfieldData();
+}
+
+
+void dGeomHeightfieldDataBuildCallback( dHeightfieldDataID d,
+                                       void* pUserData, dHeightfieldGetHeight* pCallback,
+                                       dReal width, dReal depth, int widthSamples, int depthSamples,
+                                       dReal scale, dReal offset, dReal thickness, int bWrap )
+{
+    dUASSERT( d, "argument not Heightfield data" );
+    dIASSERT( pCallback );
+    dIASSERT( widthSamples >= 2 );	// Ensure we're making something with at least one cell.
+    dIASSERT( depthSamples >= 2 );
+
+    // callback
+    d->m_nGetHeightMode = 0;
+    d->m_pUserData = pUserData;
+    d->m_pGetHeightCallback = pCallback;
+
+    // set info
+    d->SetData( widthSamples, depthSamples, width, depth, scale, offset, thickness, bWrap );
+
+    // default bounds
+    d->m_fMinHeight = -dInfinity;
+    d->m_fMaxHeight = dInfinity;
+}
+
+
+void dGeomHeightfieldDataBuildByte( dHeightfieldDataID d,
+                                   const unsigned char *pHeightData, int bCopyHeightData,
+                                   dReal width, dReal depth, int widthSamples, int depthSamples,
+                                   dReal scale, dReal offset, dReal thickness, int bWrap )
+{
+    dUASSERT( d, "Argument not Heightfield data" );
+    dIASSERT( pHeightData );
+    dIASSERT( widthSamples >= 2 );	// Ensure we're making something with at least one cell.
+    dIASSERT( depthSamples >= 2 );
+
+    // set info
+    d->SetData( widthSamples, depthSamples, width, depth, scale, offset, thickness, bWrap );
+    d->m_nGetHeightMode = 1;
+    d->m_bCopyHeightData = bCopyHeightData;
+
+    if ( d->m_bCopyHeightData == 0 )
+    {
+        // Data is referenced only.
+        d->m_pHeightData = pHeightData;
+    }
+    else
+    {
+        // We own the height data, allocate storage
+        d->m_pHeightData = new unsigned char[ d->m_nWidthSamples * d->m_nDepthSamples ];
+        dIASSERT( d->m_pHeightData );
+
+        // Copy data.
+        memcpy( (void*)d->m_pHeightData, pHeightData,
+            sizeof( unsigned char ) * d->m_nWidthSamples * d->m_nDepthSamples );
+    }
+
+    // Find height bounds
+    d->ComputeHeightBounds();
+}
+
+
+void dGeomHeightfieldDataBuildShort( dHeightfieldDataID d,
+                                    const short* pHeightData, int bCopyHeightData,
+                                    dReal width, dReal depth, int widthSamples, int depthSamples,
+                                    dReal scale, dReal offset, dReal thickness, int bWrap )
+{
+    dUASSERT( d, "Argument not Heightfield data" );
+    dIASSERT( pHeightData );
+    dIASSERT( widthSamples >= 2 );	// Ensure we're making something with at least one cell.
+    dIASSERT( depthSamples >= 2 );
+
+    // set info
+    d->SetData( widthSamples, depthSamples, width, depth, scale, offset, thickness, bWrap );
+    d->m_nGetHeightMode = 2;
+    d->m_bCopyHeightData = bCopyHeightData;
+
+    if ( d->m_bCopyHeightData == 0 )
+    {
+        // Data is referenced only.
+        d->m_pHeightData = pHeightData;
+    }
+    else
+    {
+        // We own the height data, allocate storage
+        d->m_pHeightData = new short[ d->m_nWidthSamples * d->m_nDepthSamples ];
+        dIASSERT( d->m_pHeightData );
+
+        // Copy data.
+        memcpy( (void*)d->m_pHeightData, pHeightData,
+            sizeof( short ) * d->m_nWidthSamples * d->m_nDepthSamples );
+    }
+
+    // Find height bounds
+    d->ComputeHeightBounds();
+}
+
+
+void dGeomHeightfieldDataBuildSingle( dHeightfieldDataID d,
+                                     const float *pHeightData, int bCopyHeightData,
+                                     dReal width, dReal depth, int widthSamples, int depthSamples,
+                                     dReal scale, dReal offset, dReal thickness, int bWrap )
+{
+    dUASSERT( d, "Argument not Heightfield data" );
+    dIASSERT( pHeightData );
+    dIASSERT( widthSamples >= 2 );	// Ensure we're making something with at least one cell.
+    dIASSERT( depthSamples >= 2 );
+
+    // set info
+    d->SetData( widthSamples, depthSamples, width, depth, scale, offset, thickness, bWrap );
+    d->m_nGetHeightMode = 3;
+    d->m_bCopyHeightData = bCopyHeightData;
+
+    if ( d->m_bCopyHeightData == 0 )
+    {
+        // Data is referenced only.
+        d->m_pHeightData = pHeightData;
+    }
+    else
+    {
+        // We own the height data, allocate storage
+        d->m_pHeightData = new float[ d->m_nWidthSamples * d->m_nDepthSamples ];
+        dIASSERT( d->m_pHeightData );
+
+        // Copy data.
+        memcpy( (void*)d->m_pHeightData, pHeightData,
+            sizeof( float ) * d->m_nWidthSamples * d->m_nDepthSamples );
+    }
+
+    // Find height bounds
+    d->ComputeHeightBounds();
+}
+
+void dGeomHeightfieldDataBuildDouble( dHeightfieldDataID d,
+                                     const double *pHeightData, int bCopyHeightData,
+                                     dReal width, dReal depth, int widthSamples, int depthSamples,
+                                     dReal scale, dReal offset, dReal thickness, int bWrap )
+{
+    dUASSERT( d, "Argument not Heightfield data" );
+    dIASSERT( pHeightData );
+    dIASSERT( widthSamples >= 2 );	// Ensure we're making something with at least one cell.
+    dIASSERT( depthSamples >= 2 );
+
+    // set info
+    d->SetData( widthSamples, depthSamples, width, depth, scale, offset, thickness, bWrap );
+    d->m_nGetHeightMode = 4;
+    d->m_bCopyHeightData = bCopyHeightData;
+
+    if ( d->m_bCopyHeightData == 0 )
+    {
+        // Data is referenced only.
+        d->m_pHeightData = pHeightData;
+    }
+    else
+    {
+        // We own the height data, allocate storage
+        d->m_pHeightData = new double[ d->m_nWidthSamples * d->m_nDepthSamples ];
+        dIASSERT( d->m_pHeightData );
+
+        // Copy data.
+        memcpy( (void*)d->m_pHeightData, pHeightData,
+            sizeof( double ) * d->m_nWidthSamples * d->m_nDepthSamples );
+    }
+
+    // Find height bounds
+    d->ComputeHeightBounds();
+}
+
+
+
+
+void dGeomHeightfieldDataSetBounds( dHeightfieldDataID d, dReal minHeight, dReal maxHeight )
+{
+    dUASSERT(d, "Argument not Heightfield data");
+    d->m_fMinHeight = ( minHeight * d->m_fScale ) + d->m_fOffset - d->m_fThickness;
+    d->m_fMaxHeight = ( maxHeight * d->m_fScale ) + d->m_fOffset;
+}
+
+
+void dGeomHeightfieldDataDestroy( dHeightfieldDataID d )
+{
+    dUASSERT(d, "argument not Heightfield data");
+    delete d;
+}
+
+
+//////// Heightfield geom interface ////////////////////////////////////////////////////
+
+
+dGeomID dCreateHeightfield( dSpaceID space, dHeightfieldDataID data, int bPlaceable )
+{
+    return new dxHeightfield( space, data, bPlaceable );
+}
+
+
+void dGeomHeightfieldSetHeightfieldData( dGeomID g, dHeightfieldDataID d )
+{
+    dxHeightfield* geom = (dxHeightfield*) g;
+    geom->m_p_data = d;
+}
+
+
+dHeightfieldDataID dGeomHeightfieldGetHeightfieldData( dGeomID g )
+{
+    dxHeightfield* geom = (dxHeightfield*) g;
+    return geom->m_p_data;
+}
+
+//////// dxHeightfield /////////////////////////////////////////////////////////////////
+
+
+// Typedef for generic 'get point depth' function
+typedef dReal dGetDepthFn( dGeomID g, dReal x, dReal y, dReal z );
+
+
+#define DMESS(A)	\
+    dMessage(0,"Contact Plane (%d %d %d) %.5e %.5e (%.5e %.5e %.5e)(%.5e %.5e %.5e)).",	\
+    x,z,(A),	\
+    pContact->depth,	\
+    dGeomSphereGetRadius(o2),		\
+    pContact->pos[0],	\
+    pContact->pos[1],	\
+    pContact->pos[2],	\
+    pContact->normal[0],	\
+    pContact->normal[1],	\
+    pContact->normal[2]);
+
+static inline bool DescendingTriangleSort(const HeightFieldTriangle * const A, const HeightFieldTriangle * const B)
+{
+    return ((A->maxAAAB - B->maxAAAB) > dEpsilon);
+}
+static inline bool DescendingPlaneSort(const HeightFieldPlane * const A, const HeightFieldPlane * const B)
+{
+    return ((A->maxAAAB - B->maxAAAB) > dEpsilon);
+}
+
+void dxHeightfield::sortPlanes(const sizeint numPlanes)
+{
+    bool has_swapped = true;
+    do
+    {
+        has_swapped = false;//reset flag
+        for (sizeint i = 0; i < numPlanes - 1; i++)
+        {
+            //if they are in the wrong order
+            if (DescendingPlaneSort(tempPlaneBuffer[i], tempPlaneBuffer[i + 1]))
+            { 
+                //exchange them
+                HeightFieldPlane * tempPlane = tempPlaneBuffer[i];
+                tempPlaneBuffer[i] = tempPlaneBuffer[i + 1];
+                tempPlaneBuffer[i + 1] = tempPlane;
+
+                //we have swapped at least once, list may not be sorted yet
+                has_swapped = true;
+            }
+        }
+    }    //if no swaps were made during this pass, the list has been sorted
+    while (has_swapped);
+}
+
+static inline dReal DistancePointToLine(const dVector3 &_point,
+                                        const dVector3 &_pt0,
+                                        const dVector3 &_Edge,
+                                        const dReal _Edgelength)
+{
+    dVector3 v;
+    dVector3Subtract(_point, _pt0, v);
+    dVector3 s;
+    dVector3Copy (_Edge, s);
+    const dReal dot = dVector3Dot(v, _Edge) / _Edgelength;
+    dVector3Scale(s, dot);
+    dVector3Subtract(v, s, v);
+    return dVector3Length(v);
+}
+
+
+
+
+int dxHeightfield::dCollideHeightfieldZone( const int minX, const int maxX, const int minZ, const int maxZ, 
+                                           dxGeom* o2, const int numMaxContactsPossible,
+                                           int flags, dContactGeom* contact, 
+                                           int skip )
+{
+    dContactGeom *pContact = 0;
+    int  x, z;
+    // check if not above or inside terrain first
+    // while filling a heightmap partial temporary buffer
+    const unsigned int numX = (maxX - minX) + 1;
+    const unsigned int numZ = (maxZ - minZ) + 1;
+    const dReal minO2Height = o2->aabb[2];
+    const dReal maxO2Height = o2->aabb[3];
+    unsigned int x_local, z_local;
+    dReal maxY = - dInfinity;
+    dReal minY = dInfinity;
+    // localize and const for faster access
+    const dReal cfSampleWidth = m_p_data->m_fSampleWidth;
+    const dReal cfSampleDepth = m_p_data->m_fSampleDepth;
+    {
+        if (tempHeightBufferSizeX < numX || tempHeightBufferSizeZ < numZ)
+        {
+            resetHeightBuffer();
+            allocateHeightBuffer(numX, numZ);
+        }
+
+        dReal Xpos, Ypos;
+
+        for ( x = minX, x_local = 0; x_local < numX; x++, x_local++)
+        {
+            Xpos = x * cfSampleWidth; // Always calculate pos via multiplication to avoid computational error accumulation during multiple additions
+
+            const dReal c_Xpos = Xpos;
+            HeightFieldVertex *HeightFieldRow = tempHeightBuffer[x_local];
+            for ( z = minZ, z_local = 0; z_local < numZ; z++, z_local++)
+            {
+                Ypos = z * cfSampleDepth; // Always calculate pos via multiplication to avoid computational error accumulation during multiple additions
+
+                const dReal h = m_p_data->GetHeight(x, z);
+                HeightFieldRow[z_local].vertex[0] = c_Xpos;
+                HeightFieldRow[z_local].vertex[1] = h;
+                HeightFieldRow[z_local].vertex[2] = Ypos;
+                HeightFieldRow[z_local].coords[0] = x;
+                HeightFieldRow[z_local].coords[1] = z;
+
+                maxY = dMAX(maxY, h);
+                minY = dMIN(minY, h);
+            }
+        }
+        if (minO2Height - maxY > -dEpsilon )
+        {
+            //totally above heightfield
+            return 0;
+        }
+        if (minY - maxO2Height > -dEpsilon )
+        {
+            // totally under heightfield
+            pContact = CONTACT(contact, 0);
+
+            pContact->pos[0] = o2->final_posr->pos[0];
+            pContact->pos[1] = minY;
+            pContact->pos[2] = o2->final_posr->pos[2];
+
+            pContact->normal[0] = 0;
+            pContact->normal[1] = - 1;
+            pContact->normal[2] = 0;
+
+            pContact->depth =  minY - maxO2Height;
+
+            pContact->side1 = -1;
+            pContact->side2 = -1;
+
+            return 1;
+        }
+    }
+    // get All Planes that could collide against.
+    dColliderFn *geomRayNCollider=0;
+    dColliderFn *geomNPlaneCollider=0;
+    dGetDepthFn *geomNDepthGetter=0;
+
+    // int max_collisionContact = numMaxContactsPossible; -- not used
+    switch (o2->type)
+    {
+    case dRayClass:
+        geomRayNCollider		= NULL;
+        geomNPlaneCollider	    = dCollideRayPlane;
+        geomNDepthGetter		= NULL;
+        //max_collisionContact    = 1;
+        break;
+
+    case dSphereClass:
+        geomRayNCollider		= dCollideRaySphere;
+        geomNPlaneCollider  	= dCollideSpherePlane;
+        geomNDepthGetter		= dGeomSpherePointDepth;
+        //max_collisionContact    = 3;
+        break;
+
+    case dBoxClass:
+        geomRayNCollider		= dCollideRayBox;
+        geomNPlaneCollider	    = dCollideBoxPlane;
+        geomNDepthGetter		= dGeomBoxPointDepth;
+        //max_collisionContact    = 8;
+        break;
+
+    case dCapsuleClass:
+        geomRayNCollider		= dCollideRayCapsule;
+        geomNPlaneCollider  	= dCollideCapsulePlane;
+        geomNDepthGetter		= dGeomCapsulePointDepth;
+        // max_collisionContact    = 3;
+        break;
+
+    case dCylinderClass:
+        geomRayNCollider		= dCollideRayCylinder;
+        geomNPlaneCollider	    = dCollideCylinderPlane;
+        geomNDepthGetter		= NULL;// TODO: dGeomCCylinderPointDepth
+        //max_collisionContact    = 3;
+        break;
+
+    case dConvexClass:
+        geomRayNCollider		= dCollideRayConvex;
+        geomNPlaneCollider  	= dCollideConvexPlane;
+        geomNDepthGetter		= NULL;// TODO: dGeomConvexPointDepth;
+        //max_collisionContact    = 3;
+        break;
+
+#if dTRIMESH_ENABLED
+
+    case dTriMeshClass:
+        geomRayNCollider		= dCollideRayTrimesh;
+        geomNPlaneCollider	    = dCollideTrimeshPlane;
+        geomNDepthGetter		= NULL;// TODO: dGeomTrimeshPointDepth;
+        //max_collisionContact    = 3;
+        break;
+
+#endif // dTRIMESH_ENABLED
+
+    default:
+        dIASSERT(0);	// Shouldn't ever get here.
+        break;
+
+    }
+
+    dxPlane myplane(0,0,0,0,0);
+    dxPlane* sliding_plane = &myplane;
+    dReal triplane[4];
+    int i;
+
+    // check some trivial case.
+    // Vector Up plane
+    if (maxY - minY < dEpsilon)
+    {
+        // it's a single plane.
+        triplane[0] = 0;
+        triplane[1] = 1;
+        triplane[2] = 0;
+        triplane[3] =  minY;
+        dGeomPlaneSetNoNormalize (sliding_plane, triplane);
+        // find collision and compute contact points
+        const int numTerrainContacts = geomNPlaneCollider (o2, sliding_plane, flags, contact, skip);
+        dIASSERT(numTerrainContacts <= numMaxContactsPossible);
+        for (i = 0; i < numTerrainContacts; i++)
+        {
+            pContact = CONTACT(contact, i*skip);
+            dOPESIGN(pContact->normal, =, -, triplane);
+        }
+        return numTerrainContacts;
+    }
+
+    /* -- This block is invalid as per Martijn Buijs <buijs512@planet.nl>
+
+    The problem seems to be based on the erroneously assumption that if two of 
+    the four vertices of a 'grid' are at the same height, the entire grid can be
+    represented as a single plane. It works for an axis aligned slope, but fails
+    on all 4 grids of a 3x3 spike feature. Since the plane normal is constructed
+    from only 3 vertices (only one of the two triangles) this often results in 
+    discontinuities at the grid edges (causing small jumps when the contact 
+    point moves from one grid to another).
+
+    // unique plane
+    {
+    // check for very simple plane heightfield
+    dReal minXHeightDelta = dInfinity, maxXHeightDelta = - dInfinity;
+    dReal minZHeightDelta = dInfinity, maxZHeightDelta = - dInfinity;
+
+
+    dReal lastXHeight = tempHeightBuffer[0][0].vertex[1];
+    for ( x_local = 1; x_local < numX; x_local++)
+    {
+    HeightFieldVertex *HeightFieldRow = tempHeightBuffer[x_local];
+
+    const dReal deltaX = HeightFieldRow[0].vertex[1] - lastXHeight;
+
+    maxXHeightDelta = dMAX (maxXHeightDelta,  deltaX);
+    minXHeightDelta = dMIN (minXHeightDelta,  deltaX);
+
+    dReal lastZHeight = HeightFieldRow[0].vertex[1];
+    for ( z_local = 1; z_local < numZ; z_local++)
+    {
+    const dReal deltaZ = (HeightFieldRow[z_local].vertex[1] - lastZHeight);
+
+    maxZHeightDelta = dMAX (maxZHeightDelta,  deltaZ);
+    minZHeightDelta = dMIN (minZHeightDelta,  deltaZ);
+
+    }
+    }
+
+    if (maxZHeightDelta - minZHeightDelta < dEpsilon && 
+    maxXHeightDelta - minXHeightDelta < dEpsilon )
+    {
+    // it's a single plane.
+    const dVector3 &A = tempHeightBuffer[0][0].vertex;
+    const dVector3 &B = tempHeightBuffer[1][0].vertex;
+    const dVector3 &C = tempHeightBuffer[0][1].vertex;
+
+    // define 2 edges and a point that will define collision plane
+    {
+    dVector3 Edge1, Edge2; 
+    dVector3Subtract(C, A, Edge1);
+    dVector3Subtract(B, A, Edge2);
+    dVector3Cross(Edge1, Edge2, triplane);
+    }
+
+    // Define Plane
+    // Normalize plane normal
+    const dReal dinvlength = REAL(1.0) / dVector3Length(triplane);
+    triplane[0] *= dinvlength;
+    triplane[1] *= dinvlength;
+    triplane[2] *= dinvlength;
+    // get distance to origin from plane 
+    triplane[3] = dVector3Dot(triplane, A);
+
+    dGeomPlaneSetNoNormalize (sliding_plane, triplane);
+    // find collision and compute contact points
+    const int numTerrainContacts = geomNPlaneCollider (o2, sliding_plane, flags, contact, skip);
+    dIASSERT(numTerrainContacts <= numMaxContactsPossible);
+    for (i = 0; i < numTerrainContacts; i++)
+    {
+    pContact = CONTACT(contact, i*skip);
+    dOPESIGN(pContact->normal, =, -, triplane);
+    }
+    return numTerrainContacts;
+    }
+    }
+    */
+
+    int numTerrainContacts = 0;
+    dContactGeom *PlaneContact = m_p_data->m_contacts;
+
+    const unsigned int numTriMax = (maxX - minX) * (maxZ - minZ) * 2;
+    if (tempTriangleBufferSize < numTriMax)
+    {
+        resetTriangleBuffer();
+        allocateTriangleBuffer(numTriMax);
+    }
+
+    // Sorting triangle/plane  resulting from heightfield zone
+    // Perhaps that would be necessary in case of too much limited
+    // maximum contact point...
+    // or in complex mesh case (trimesh and convex)
+    // need some test or insights on this before enabling this.
+    const bool isContactNumPointsLimited = 
+        true;
+    // (numMaxContacts < 8)
+    //    || o2->type == dConvexClass
+    //    || o2->type == dTriMeshClass
+    //    || (numMaxContacts < (int)numTriMax)       
+
+
+
+    // if small heightfield triangle related to O2 colliding
+    // or no Triangle colliding at all.
+    bool needFurtherPasses = (o2->type == dTriMeshClass);
+    //compute Ratio between Triangle size and O2 aabb size
+    // no FurtherPasses are needed in ray class
+    if (o2->type != dRayClass  && needFurtherPasses == false)
+    {
+        const dReal xratio = (o2->aabb[1] - o2->aabb[0]) * m_p_data->m_fInvSampleWidth;
+        if (xratio > REAL(1.5))
+            needFurtherPasses = true;
+        else
+        {
+            const dReal zratio = (o2->aabb[5] - o2->aabb[4]) * m_p_data->m_fInvSampleDepth;
+            if (zratio > REAL(1.5))
+                needFurtherPasses = true;
+        }
+
+    }
+
+    unsigned int numTri = 0;
+    HeightFieldVertex *A, *B, *C, *D;
+    /*    (y is up)
+         A--------B-...x
+         |       /|
+         |      / |
+         |     /  |
+         |    /   |
+         |   /    |
+         |  /     |
+         | /      |
+         |/       |
+         C--------D   
+         .
+         .
+         .
+         z
+    */  
+    // keep only triangle that does intersect geom
+
+    const unsigned int maxX_local = maxX - minX;
+    const unsigned int maxZ_local = maxZ - minZ;
+
+    for ( x_local = 0; x_local < maxX_local; x_local++)
+    {
+        HeightFieldVertex *HeightFieldRow      = tempHeightBuffer[x_local];
+        HeightFieldVertex *HeightFieldNextRow  = tempHeightBuffer[x_local + 1];
+
+        // First A
+        C = &HeightFieldRow    [0];
+        // First B
+        D = &HeightFieldNextRow[0];
+
+        for ( z_local = 0; z_local < maxZ_local; z_local++)
+        {
+            A = C;
+            B = D;
+
+            C = &HeightFieldRow    [z_local + 1];
+            D = &HeightFieldNextRow[z_local + 1];
+
+            const dReal AHeight = A->vertex[1];
+            const dReal BHeight = B->vertex[1];
+            const dReal CHeight = C->vertex[1];
+            const dReal DHeight = D->vertex[1];
+
+            const bool isACollide = AHeight > minO2Height;
+            const bool isBCollide = BHeight > minO2Height;
+            const bool isCCollide = CHeight > minO2Height;
+            const bool isDCollide = DHeight > minO2Height;
+
+            A->state = !(isACollide);
+            B->state = !(isBCollide);
+            C->state = !(isCCollide);
+            D->state = !(isDCollide);
+
+            if (isACollide || isBCollide || isCCollide)
+            {
+                HeightFieldTriangle * const CurrTriUp = &tempTriangleBuffer[numTri++];
+
+                CurrTriUp->state = false;
+
+                // changing point order here implies to change it in isOnHeightField
+                CurrTriUp->vertices[0] = A;
+                CurrTriUp->vertices[1] = B;
+                CurrTriUp->vertices[2] = C;
+
+                if (isContactNumPointsLimited)
+                    CurrTriUp->setMinMax();
+                CurrTriUp->isUp = true;
+            }
+
+            if (isBCollide || isCCollide || isDCollide)
+            {
+                HeightFieldTriangle * const CurrTriDown = &tempTriangleBuffer[numTri++];
+
+                CurrTriDown->state = false;
+                // changing point order here implies to change it in isOnHeightField
+
+                CurrTriDown->vertices[0] = D;
+                CurrTriDown->vertices[1] = B;
+                CurrTriDown->vertices[2] = C;
+
+
+                if (isContactNumPointsLimited)
+                    CurrTriDown->setMinMax();
+                CurrTriDown->isUp = false;
+            }
+
+
+            if (needFurtherPasses &&
+                (isBCollide || isCCollide)
+                &&
+                (AHeight > CHeight &&
+                AHeight > BHeight &&
+                DHeight > CHeight &&
+                DHeight > BHeight))
+            {
+                // That means Edge BC is concave, therefore
+                // BC Edge and B and C vertices cannot collide
+
+                B->state = true;
+                C->state = true;
+            }
+            // should find a way to check other edges (AB, BD, CD) too for concavity
+        }
+    }
+
+    // at least on triangle should intersect geom
+    dIASSERT (numTri != 0);
+    // pass1: VS triangle as Planes
+    // Group Triangle by same plane definition
+    // as Terrain often has many triangles using same plane definition
+    // then collide against that list of triangles.
+    {
+
+        dVector3 Edge1, Edge2;
+        //compute all triangles normals.
+        for (unsigned int k = 0; k < numTri; k++)
+        {
+            HeightFieldTriangle * const itTriangle = &tempTriangleBuffer[k];
+
+            // define 2 edges and a point that will define collision plane
+            dVector3Subtract(itTriangle->vertices[2]->vertex, itTriangle->vertices[0]->vertex, Edge1);
+            dVector3Subtract(itTriangle->vertices[1]->vertex, itTriangle->vertices[0]->vertex, Edge2);
+
+            // find a perpendicular vector to the triangle
+            if  (itTriangle->isUp)
+                dVector3Cross(Edge1, Edge2, triplane);
+            else
+                dVector3Cross(Edge2, Edge1, triplane);
+
+            // Define Plane
+            // Normalize plane normal
+            const dReal dinvlength = REAL(1.0) / dVector3Length(triplane);
+            triplane[0] *= dinvlength;
+            triplane[1] *= dinvlength;
+            triplane[2] *= dinvlength;
+            // get distance to origin from plane 
+            triplane[3] = dVector3Dot(triplane, itTriangle->vertices[0]->vertex);
+
+            // saves normal for collision check (planes, triangles, vertices and edges.)
+            dVector3Copy(triplane, itTriangle->planeDef);
+            // saves distance for collision check (planes, triangles, vertices and edges.)
+            itTriangle->planeDef[3] = triplane[3];
+        }
+
+        // group by Triangles by Planes sharing shame plane definition
+        if (tempPlaneBufferSize  < numTri)
+        {
+            resetPlaneBuffer();
+            allocatePlaneBuffer(numTri);
+        }
+
+        unsigned int numPlanes = 0;
+        for (unsigned int k = 0; k < numTri; k++)
+        {
+            HeightFieldTriangle * const tri_base = &tempTriangleBuffer[k];
+
+            if (tri_base->state == true)
+                continue;// already tested or added to plane list.
+
+            HeightFieldPlane * const currPlane = tempPlaneBuffer[numPlanes];
+            currPlane->resetTriangleListSize(numTri - k);
+            currPlane->addTriangle(tri_base);
+            // saves normal for collision check (planes, triangles, vertices and edges.)
+            dVector3Copy(tri_base->planeDef, currPlane->planeDef);
+            // saves distance for collision check (planes, triangles, vertices and edges.)
+            currPlane->planeDef[3]= tri_base->planeDef[3];
+
+            const dReal normx = tri_base->planeDef[0];
+            const dReal normy = tri_base->planeDef[1];
+            const dReal normz = tri_base->planeDef[2];
+            const dReal dist = tri_base->planeDef[3];
+
+            for (unsigned int m = k + 1; m < numTri; m++)
+            {
+
+                HeightFieldTriangle * const tri_test = &tempTriangleBuffer[m];
+                if (tri_test->state == true)
+                    continue;// already tested or added to plane list.
+
+                // normals and distance are the same.
+                if (
+                    dFabs(normy - tri_test->planeDef[1]) < dEpsilon &&  
+                    dFabs(dist  - tri_test->planeDef[3]) < dEpsilon &&
+                    dFabs(normx - tri_test->planeDef[0]) < dEpsilon && 
+                    dFabs(normz - tri_test->planeDef[2]) < dEpsilon
+                    )
+                {
+                    currPlane->addTriangle (tri_test);
+                    tri_test->state = true;
+                }
+            }
+
+            tri_base->state = true;
+            if (isContactNumPointsLimited)
+                currPlane->setMinMax();
+
+            numPlanes++;
+        }
+
+        // sort planes
+        if (isContactNumPointsLimited)
+            sortPlanes(numPlanes);
+
+#if !defined(NO_CONTACT_CULLING_BY_ISONHEIGHTFIELD2)
+        /*
+        Note by Oleh_Derevenko:
+        It seems to be incorrect to limit contact count by some particular value
+        since some of them (and even all of them) may be culled in following condition.
+        However I do not see an easy way to fix this.
+        If not that culling the flags modification should be changed here and
+        additionally repeated after some contacts have been generated (in "if (didCollide)").
+        The maximum of contacts in flags would then be set to minimum of contacts
+        remaining and HEIGHTFIELDMAXCONTACTPERCELL.
+        */
+        int planeTestFlags = (flags & ~NUMC_MASK) | HEIGHTFIELDMAXCONTACTPERCELL;
+        dIASSERT((HEIGHTFIELDMAXCONTACTPERCELL & ~NUMC_MASK) == 0);
+#else // if defined(NO_CONTACT_CULLING_BY_ISONHEIGHTFIELD2)
+        int numMaxContactsPerPlane = dMIN(numMaxContactsPossible - numTerrainContacts, HEIGHTFIELDMAXCONTACTPERCELL);
+        int planeTestFlags = (flags & ~NUMC_MASK) | numMaxContactsPerPlane;
+        dIASSERT((HEIGHTFIELDMAXCONTACTPERCELL & ~NUMC_MASK) == 0);
+#endif        
+
+        for (unsigned int k = 0; k < numPlanes; k++)
+        {
+            HeightFieldPlane * const itPlane = tempPlaneBuffer[k];
+
+            //set Geom
+            dGeomPlaneSetNoNormalize (sliding_plane,  itPlane->planeDef);
+            //dGeomPlaneSetParams (sliding_plane, triangle_Plane[0], triangle_Plane[1], triangle_Plane[2], triangle_Plane[3]);
+            // find collision and compute contact points
+            bool didCollide = false;
+            const int numPlaneContacts = geomNPlaneCollider (o2, sliding_plane, planeTestFlags, PlaneContact, sizeof(dContactGeom));
+            const sizeint planeTriListSize = itPlane->trianglelistCurrentSize;
+            for (i = 0; i < numPlaneContacts; i++)
+            {
+                dContactGeom *planeCurrContact = PlaneContact + i;
+                // Check if contact point found in plane is inside Triangle.
+                const dVector3 &pCPos = planeCurrContact->pos;
+                for (sizeint b = 0; planeTriListSize > b; b++)
+                {  
+                    if (m_p_data->IsOnHeightfield2 (itPlane->trianglelist[b]->vertices[0], 
+                        pCPos, 
+                        itPlane->trianglelist[b]->isUp))
+                    {
+                        pContact = CONTACT(contact, numTerrainContacts*skip);
+                        dVector3Copy(pCPos, pContact->pos);
+                        dOPESIGN(pContact->normal, =, -, itPlane->planeDef);
+                        pContact->depth = planeCurrContact->depth;
+                        pContact->side1 = planeCurrContact->side1;
+                        pContact->side2 = planeCurrContact->side2;
+                        numTerrainContacts++;
+                        if ( numTerrainContacts == numMaxContactsPossible )
+                            return numTerrainContacts;
+
+                        didCollide = true;
+                        break;
+                    }
+                }
+            }
+            if (didCollide)
+            {
+#if defined(NO_CONTACT_CULLING_BY_ISONHEIGHTFIELD2)
+                /* Note by Oleh_Derevenko:
+                This code is not used - see another note above
+                */
+                numMaxContactsPerPlane = dMIN(numMaxContactsPossible - numTerrainContacts, HEIGHTFIELDMAXCONTACTPERCELL);
+                planeTestFlags = (flags & ~NUMC_MASK) | numMaxContactsPerPlane;
+                dIASSERT((HEIGHTFIELDMAXCONTACTPERCELL & ~NUMC_MASK) == 0);
+#endif        
+                for (sizeint b = 0; planeTriListSize > b; b++)
+                {                      
+                    // flag Triangles Vertices as collided 
+                    // to prevent any collision test of those
+                    for (i = 0; i < 3; i++)
+                        itPlane->trianglelist[b]->vertices[i]->state = true;
+                }
+            }
+            else 
+            {
+                // flag triangle as not collided so that Vertices or Edge
+                // of that triangles will be checked.
+                for (sizeint b = 0; planeTriListSize > b; b++)
+                { 
+                    itPlane->trianglelist[b]->state = false;
+                }
+            }
+        }
+    }
+
+
+
+    // pass2: VS triangle vertices
+    if (needFurtherPasses)
+    {
+        dxRay tempRay(0, 1); 
+        dReal depth;
+        bool vertexCollided;
+
+        // Only one contact is necessary for ray test
+        int rayTestFlags = (flags & ~NUMC_MASK) | 1;
+        dIASSERT((1 & ~NUMC_MASK) == 0);
+        //
+        // Find Contact Penetration Depth of each vertices
+        //
+        for (unsigned int k = 0; k < numTri; k++)
+        {
+            const HeightFieldTriangle * const itTriangle = &tempTriangleBuffer[k];
+            if (itTriangle->state == true)
+                continue;// plane triangle did already collide.
+
+            for (sizeint i = 0; i < 3; i++)
+            {
+                HeightFieldVertex *vertex = itTriangle->vertices[i];
+                if (vertex->state == true)
+                    continue;// vertice did already collide.
+
+                vertexCollided = false;
+                const dVector3 &triVertex = vertex->vertex;
+                if ( geomNDepthGetter )
+                {
+                    depth = geomNDepthGetter( o2,
+                        triVertex[0], triVertex[1], triVertex[2] );
+                    if (depth > dEpsilon)
+                        vertexCollided = true;
+                }
+                else
+                {
+                    // We don't have a GetDepth function, so do a ray cast instead.
+                    // NOTE: This isn't ideal, and a GetDepth function should be
+                    // written for all geom classes.
+                    tempRay.length = (minO2Height - triVertex[1]) * REAL(1000.0);
+
+                    //dGeomRaySet( &tempRay, pContact->pos[0], pContact->pos[1], pContact->pos[2],
+                    //    - itTriangle->Normal[0], - itTriangle->Normal[1], - itTriangle->Normal[2] );
+                    dGeomRaySetNoNormalize(tempRay, triVertex, itTriangle->planeDef);
+
+                    if ( geomRayNCollider( &tempRay, o2, rayTestFlags, PlaneContact, sizeof( dContactGeom ) ) )
+                    {
+                        depth = PlaneContact[0].depth;
+                        vertexCollided = true;
+                    }
+                }
+                if (vertexCollided)
+                {
+                    pContact = CONTACT(contact, numTerrainContacts*skip);
+                    //create contact using vertices
+                    dVector3Copy (triVertex, pContact->pos);
+                    //create contact using Plane Normal
+                    dOPESIGN(pContact->normal, =, -, itTriangle->planeDef);
+
+                    pContact->depth = depth;
+                    pContact->side1 = -1;
+                    pContact->side2 = -1;
+
+                    numTerrainContacts++;
+                    if ( numTerrainContacts == numMaxContactsPossible ) 
+                        return numTerrainContacts;
+
+                    vertex->state = true;
+                }
+            }
+        }
+    }
+
+#ifdef _HEIGHTFIELDEDGECOLLIDING
+    // pass3: VS triangle Edges
+    if (needFurtherPasses)
+    {
+        dVector3 Edge;
+        dxRay edgeRay(0, 1);
+
+        int numMaxContactsPerTri = dMIN(numMaxContactsPossible - numTerrainContacts, HEIGHTFIELDMAXCONTACTPERCELL);
+        int triTestFlags = (flags & ~NUMC_MASK) | numMaxContactsPerTri;
+        dIASSERT((HEIGHTFIELDMAXCONTACTPERCELL & ~NUMC_MASK) == 0);
+
+        for (unsigned int k = 0; k < numTri; k++)
+        {
+            const HeightFieldTriangle * const itTriangle = &tempTriangleBuffer[k];
+
+            if (itTriangle->state == true)
+                continue;// plane did already collide.
+
+            for (sizeint m = 0; m < 3; m++)
+            {
+                const sizeint next = (m + 1) % 3;
+                HeightFieldVertex *vertex0 = itTriangle->vertices[m];
+                HeightFieldVertex *vertex1 = itTriangle->vertices[next];
+
+                // not concave or under the AABB 
+                // nor triangle already collided against vertices
+                if (vertex0->state == true && vertex1->state == true)
+                    continue;// plane did already collide.
+
+                dVector3Subtract(vertex1->vertex, vertex0->vertex, Edge);
+                edgeRay.length = dVector3Length (Edge);
+                dGeomRaySetNoNormalize(edgeRay, vertex1->vertex, Edge);
+                int prevTerrainContacts = numTerrainContacts;
+                pContact = CONTACT(contact, prevTerrainContacts*skip);
+                const int numCollision = geomRayNCollider(&edgeRay,o2,triTestFlags,pContact,skip);
+                dIASSERT(numCollision <= numMaxContactsPerTri);
+
+                if (numCollision)
+                {
+                    numTerrainContacts += numCollision;
+
+                    do
+                    {
+                        pContact = CONTACT(contact, prevTerrainContacts*skip);
+
+                        //create contact using Plane Normal
+                        dOPESIGN(pContact->normal, =, -, itTriangle->planeDef);
+
+                        pContact->depth = DistancePointToLine(pContact->pos, vertex1->vertex, Edge, edgeRay.length);
+                    }
+                    while (++prevTerrainContacts != numTerrainContacts);
+
+                    if ( numTerrainContacts == numMaxContactsPossible )
+                        return numTerrainContacts;
+
+                    numMaxContactsPerTri = dMIN(numMaxContactsPossible - numTerrainContacts, HEIGHTFIELDMAXCONTACTPERCELL);
+                    triTestFlags = (flags & ~NUMC_MASK) | numMaxContactsPerTri;
+                    dIASSERT((HEIGHTFIELDMAXCONTACTPERCELL & ~NUMC_MASK) == 0);
+                }
+            }
+
+            itTriangle->vertices[0]->state = true;
+            itTriangle->vertices[1]->state = true;
+            itTriangle->vertices[2]->state = true;
+        }
+    }
+#endif // _HEIGHTFIELDEDGECOLLIDING
+    return numTerrainContacts;
+}
+
+int dCollideHeightfield( dxGeom *o1, dxGeom *o2, int flags, dContactGeom* contact, int skip )
+{
+    dIASSERT( skip >= (int)sizeof(dContactGeom) );
+    dIASSERT( o1->type == dHeightfieldClass );
+    dIASSERT((flags & NUMC_MASK) >= 1);
+
+    int i;
+
+    // if ((flags & NUMC_MASK) == 0) -- An assertion check is made on entry
+    //	{ flags = (flags & ~NUMC_MASK) | 1; dIASSERT((1 & ~NUMC_MASK) == 0); }
+
+    int numMaxTerrainContacts = (flags & NUMC_MASK);
+
+    dxHeightfield *terrain = (dxHeightfield*) o1;
+
+    dVector3 posbak;
+    dMatrix3 Rbak;
+    dReal aabbbak[6];
+    int gflagsbak;
+    dVector3 pos0,pos1;
+    dMatrix3 R1;
+
+    int numTerrainContacts = 0;
+    int numTerrainOrigContacts = 0;
+
+    //@@ Should find a way to set reComputeAABB to false in default case
+    // aka DHEIGHTFIELD_CORNER_ORIGIN not defined and terrain not PLACEABLE
+    // so that we can free some memory and speed up things a bit
+    // while saving some precision loss 
+#ifndef DHEIGHTFIELD_CORNER_ORIGIN
+    const bool reComputeAABB = true;
+#else
+    const bool reComputeAABB = ( terrain->gflags & GEOM_PLACEABLE ) ? true : false;
+#endif //DHEIGHTFIELD_CORNER_ORIGIN
+
+    //
+    // Transform O2 into Heightfield Space
+    //
+    if (reComputeAABB)
+    {
+        // Backup original o2 position, rotation and AABB.
+        dVector3Copy( o2->final_posr->pos, posbak );
+        dMatrix3Copy( o2->final_posr->R, Rbak );
+        memcpy( aabbbak, o2->aabb, sizeof( dReal ) * 6 );
+        gflagsbak = o2->gflags;
+    }
+
+    if ( terrain->gflags & GEOM_PLACEABLE )
+    {
+        // Transform o2 into heightfield space.
+        dSubtractVectors3( pos0, o2->final_posr->pos, terrain->final_posr->pos );
+        dMultiply1_331( pos1, terrain->final_posr->R, pos0 );
+        dMultiply1_333( R1, terrain->final_posr->R, o2->final_posr->R );
+
+        // Update o2 with transformed position and rotation.
+        dVector3Copy( pos1, o2->final_posr->pos );
+        dMatrix3Copy( R1, o2->final_posr->R );
+    }
+
+#ifndef DHEIGHTFIELD_CORNER_ORIGIN
+    o2->final_posr->pos[ 0 ] += terrain->m_p_data->m_fHalfWidth;
+    o2->final_posr->pos[ 2 ] += terrain->m_p_data->m_fHalfDepth;
+#endif // DHEIGHTFIELD_CORNER_ORIGIN
+
+    // Rebuild AABB for O2
+    if (reComputeAABB)
+        o2->computeAABB();
+
+    //
+    // Collide
+    //
+
+    //check if inside boundaries
+    // using O2 aabb
+    //  aabb[6] is (minx, maxx, miny, maxy, minz, maxz) 
+    const bool wrapped = terrain->m_p_data->m_bWrapMode != 0;
+
+    if ( !wrapped )
+    {
+        if (    o2->aabb[0] > terrain->m_p_data->m_fWidth //MinX
+            ||  o2->aabb[4] > terrain->m_p_data->m_fDepth)//MinZ
+            goto dCollideHeightfieldExit;
+
+        if (    o2->aabb[1] < 0 //MaxX
+            ||  o2->aabb[5] < 0)//MaxZ
+            goto dCollideHeightfieldExit;
+    }
+
+    { // To narrow scope of following variables
+        const dReal fInvSampleWidth = terrain->m_p_data->m_fInvSampleWidth;
+        int nMinX = (int)dFloor(dNextAfter(o2->aabb[0] * fInvSampleWidth, -dInfinity));
+        int nMaxX = (int)dCeil(dNextAfter(o2->aabb[1] * fInvSampleWidth, dInfinity));
+        const dReal fInvSampleDepth = terrain->m_p_data->m_fInvSampleDepth;
+        int nMinZ = (int)dFloor(dNextAfter(o2->aabb[4] * fInvSampleDepth, -dInfinity));
+        int nMaxZ = (int)dCeil(dNextAfter(o2->aabb[5] * fInvSampleDepth, dInfinity));
+
+        if ( !wrapped )
+        {
+            nMinX = dMAX( nMinX, 0 );
+            nMaxX = dMIN( nMaxX, terrain->m_p_data->m_nWidthSamples - 1 );
+            nMinZ = dMAX( nMinZ, 0 );
+            nMaxZ = dMIN( nMaxZ, terrain->m_p_data->m_nDepthSamples - 1 );
+
+            dIASSERT ((nMinX < nMaxX) && (nMinZ < nMaxZ));
+        }
+
+        numTerrainOrigContacts = numTerrainContacts;
+        numTerrainContacts += terrain->dCollideHeightfieldZone(
+            nMinX,nMaxX,nMinZ,nMaxZ,o2,numMaxTerrainContacts - numTerrainContacts,
+            flags,CONTACT(contact,numTerrainContacts*skip),skip	);
+        dIASSERT( numTerrainContacts <= numMaxTerrainContacts );
+    }
+
+    dContactGeom *pContact;
+    for ( i = numTerrainOrigContacts; i != numTerrainContacts; ++i )
+    {
+        pContact = CONTACT(contact,i*skip);
+        pContact->g1 = o1;
+        pContact->g2 = o2;
+        // pContact->side1 = -1; -- Oleh_Derevenko: sides must not be erased here as they are set by respective colliders during ray/plane tests 
+        // pContact->side2 = -1;
+    }
+
+
+    //------------------------------------------------------------------------------
+
+dCollideHeightfieldExit:
+
+    if (reComputeAABB)
+    {
+        // Restore o2 position, rotation and AABB
+        dVector3Copy( posbak, o2->final_posr->pos );
+        dMatrix3Copy( Rbak, o2->final_posr->R );
+        memcpy( o2->aabb, aabbbak, sizeof(dReal)*6 );
+        o2->gflags = gflagsbak;
+
+        //
+        // Transform Contacts to World Space
+        //
+        if ( terrain->gflags & GEOM_PLACEABLE )
+        {
+            for ( i = 0; i < numTerrainContacts; ++i )
+            {
+                pContact = CONTACT(contact,i*skip);
+                dCopyVector3( pos0, pContact->pos );
+
+#ifndef DHEIGHTFIELD_CORNER_ORIGIN
+                pos0[ 0 ] -= terrain->m_p_data->m_fHalfWidth;
+                pos0[ 2 ] -= terrain->m_p_data->m_fHalfDepth;
+#endif // !DHEIGHTFIELD_CORNER_ORIGIN
+
+                dMultiply0_331( pContact->pos, terrain->final_posr->R, pos0 );
+
+                dAddVectors3( pContact->pos, pContact->pos, terrain->final_posr->pos );
+                dCopyVector3( pos0, pContact->normal );
+
+                dMultiply0_331( pContact->normal, terrain->final_posr->R, pos0 );
+            }
+        }
+#ifndef DHEIGHTFIELD_CORNER_ORIGIN
+        else
+        {
+            for ( i = 0; i < numTerrainContacts; ++i )
+            {
+                pContact = CONTACT(contact,i*skip);
+                pContact->pos[ 0 ] -= terrain->m_p_data->m_fHalfWidth;
+                pContact->pos[ 2 ] -= terrain->m_p_data->m_fHalfDepth;
+            }
+        }
+#endif // !DHEIGHTFIELD_CORNER_ORIGIN
+    }
+    // Return contact count.
+    return numTerrainContacts;
+}
+
+
+
diff --git a/libs/ode-0.16.1/ode/src/heightfield.h b/libs/ode-0.16.1/ode/src/heightfield.h
new file mode 100644
index 0000000..9b27f34
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/heightfield.h
@@ -0,0 +1,245 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// dHeightfield Collider
+//  Martijn Buijs 2006 http://home.planet.nl/~buijs512/
+// Based on Terrain & Cone contrib by:
+//  Benoit CHAPEROT 2003-2004 http://www.jstarlab.com
+
+#ifndef _DHEIGHTFIELD_H_
+#define _DHEIGHTFIELD_H_
+//------------------------------------------------------------------------------
+
+#include <ode/common.h>
+#include "collision_kernel.h"
+
+
+#define HEIGHTFIELDMAXCONTACTPERCELL 10
+
+
+class HeightFieldVertex;
+class HeightFieldEdge;
+class HeightFieldTriangle;
+
+//
+// dxHeightfieldData
+//
+// Heightfield Data structure
+//
+struct dxHeightfieldData
+{
+    dReal m_fWidth;				// World space heightfield dimension on X axis
+    dReal m_fDepth;				// World space heightfield dimension on Z axis
+    dReal m_fSampleWidth;		// Vertex spacing on X axis edge (== m_vWidth / (m_nWidthSamples-1))
+    dReal m_fSampleDepth;		// Vertex spacing on Z axis edge (== m_vDepth / (m_nDepthSamples-1))
+    dReal m_fSampleZXAspect;    // Relation of Z axis spacing to X axis spacing (== m_fSampleDepth / m_fSampleWidth)
+    dReal m_fInvSampleWidth;		// Cache of inverse Vertex count on X axis edge (== m_vWidth / (m_nWidthSamples-1))
+    dReal m_fInvSampleDepth;		// Cache of inverse Vertex count on Z axis edge (== m_vDepth / (m_nDepthSamples-1))
+
+    dReal m_fHalfWidth;			// Cache of half of m_fWidth
+    dReal m_fHalfDepth;			// Cache of half of m_fDepth
+
+    dReal m_fMinHeight;        // Min sample height value (scaled and offset)
+    dReal m_fMaxHeight;        // Max sample height value (scaled and offset)
+    dReal m_fThickness;        // Surface thickness (added to bottom AABB)
+    dReal m_fScale;            // Sample value multiplier
+    dReal m_fOffset;           // Vertical sample offset
+
+    int	m_nWidthSamples;       // Vertex count on X axis edge (number of samples)
+    int	m_nDepthSamples;       // Vertex count on Z axis edge (number of samples)
+    int m_bCopyHeightData;     // Do we own the sample data?
+    int	m_bWrapMode;           // Heightfield wrapping mode (0=finite, 1=infinite)
+    int m_nGetHeightMode;      // GetHeight mode ( 0=callback, 1=byte, 2=short, 3=float )
+
+    const void* m_pHeightData; // Sample data array
+    void* m_pUserData;         // Callback user data
+
+    dContactGeom            m_contacts[HEIGHTFIELDMAXCONTACTPERCELL];
+
+    dHeightfieldGetHeight* m_pGetHeightCallback;		// Callback pointer.
+
+    dxHeightfieldData();
+    ~dxHeightfieldData();
+
+    void SetData( int nWidthSamples, int nDepthSamples,
+        dReal fWidth, dReal fDepth,
+        dReal fScale, dReal fOffset,
+        dReal fThickness, int bWrapMode );
+
+    void ComputeHeightBounds();
+
+    bool IsOnHeightfield2  ( const HeightFieldVertex * const CellCorner, 
+        const dReal * const pos,  const bool isABC) const;
+
+    dReal GetHeight(int x, int z);
+    dReal GetHeight(dReal x, dReal z);
+
+};
+
+typedef int HeightFieldVertexCoords[2];
+
+class HeightFieldVertex
+{
+public:
+    HeightFieldVertex(){};
+
+    dVector3 vertex;
+    HeightFieldVertexCoords coords;
+    bool state;
+};
+
+class HeightFieldEdge
+{
+public:
+    HeightFieldEdge(){};
+
+    HeightFieldVertex   *vertices[2];
+};
+
+class HeightFieldTriangle
+{
+public:
+    HeightFieldTriangle(){};
+
+    inline void setMinMax()
+    {
+        maxAAAB = vertices[0]->vertex[1] > vertices[1]->vertex[1] ? vertices[0]->vertex[1] : vertices[1]->vertex[1];
+        maxAAAB = vertices[2]->vertex[1] > maxAAAB  ? vertices[2]->vertex[1] : maxAAAB;
+    };
+
+    HeightFieldVertex   *vertices[3];
+    dReal               planeDef[4];
+    dReal               maxAAAB;
+
+    bool                isUp;
+    bool                state;
+};
+
+class HeightFieldPlane
+{
+public:
+    HeightFieldPlane():
+        trianglelist(0),
+        trianglelistReservedSize(0),
+        trianglelistCurrentSize(0)
+    {
+    }
+
+    ~HeightFieldPlane()
+    {
+        delete [] trianglelist;
+    }
+
+    inline void setMinMax()
+    {
+        const sizeint asize = trianglelistCurrentSize;
+        if (asize > 0)
+        {  
+            maxAAAB = trianglelist[0]->maxAAAB;
+            for (sizeint k = 1; asize > k; k++)
+            {   
+                if (trianglelist[k]->maxAAAB >  maxAAAB)
+                    maxAAAB = trianglelist[k]->maxAAAB;
+            }
+        }
+    };
+
+    void resetTriangleListSize(const sizeint newSize)
+    {
+        if (trianglelistReservedSize < newSize)
+        {
+            delete [] trianglelist;
+            trianglelistReservedSize = newSize;
+            trianglelist = new HeightFieldTriangle *[newSize];
+        }
+        trianglelistCurrentSize = 0;
+    }
+
+    void addTriangle(HeightFieldTriangle *tri)
+    {
+        dIASSERT(trianglelistCurrentSize < trianglelistReservedSize);
+
+        trianglelist[trianglelistCurrentSize++] = tri;
+    }
+
+    HeightFieldTriangle **trianglelist;
+    sizeint             trianglelistReservedSize;
+    sizeint             trianglelistCurrentSize;
+
+    dReal   maxAAAB;
+    dReal   planeDef[4];
+};
+
+//
+// dxHeightfield
+//
+// Heightfield geom structure
+//
+struct dxHeightfield : public dxGeom
+{
+    dxHeightfieldData* m_p_data;
+
+    dxHeightfield( dSpaceID space, dHeightfieldDataID data, int bPlaceable );
+    ~dxHeightfield();
+
+    void computeAABB();
+
+    int dCollideHeightfieldZone( const int minX, const int maxX, const int minZ, const int maxZ,  
+        dxGeom *o2, const int numMaxContacts,
+        int flags, dContactGeom *contact, int skip );
+
+    enum
+    {
+        TEMP_PLANE_BUFFER_ELEMENT_COUNT_ALIGNMENT = 4,
+        TEMP_HEIGHT_BUFFER_ELEMENT_COUNT_ALIGNMENT_X = 4,
+        TEMP_HEIGHT_BUFFER_ELEMENT_COUNT_ALIGNMENT_Z = 4,
+        TEMP_TRIANGLE_BUFFER_ELEMENT_COUNT_ALIGNMENT = 1 // Triangles are easy to reallocate and hard to predict
+    };
+
+    static inline sizeint AlignBufferSize(sizeint value, sizeint alignment) { dIASSERT((alignment & (alignment - 1)) == 0); return (value + (alignment - 1)) & ~(alignment - 1); }
+
+    void  allocateTriangleBuffer(sizeint numTri);
+    void  resetTriangleBuffer();
+    void  allocatePlaneBuffer(sizeint numTri);
+    void  resetPlaneBuffer();
+    void  allocateHeightBuffer(sizeint numX, sizeint numZ);
+    void  resetHeightBuffer();
+
+    void  sortPlanes(const sizeint numPlanes);
+
+    HeightFieldPlane    **tempPlaneBuffer;
+    HeightFieldPlane    *tempPlaneInstances;
+    sizeint             tempPlaneBufferSize;
+
+    HeightFieldTriangle *tempTriangleBuffer;
+    sizeint             tempTriangleBufferSize;
+
+    HeightFieldVertex   **tempHeightBuffer;
+    HeightFieldVertex   *tempHeightInstances;
+    sizeint             tempHeightBufferSizeX;
+    sizeint             tempHeightBufferSizeZ;
+
+};
+
+
+//------------------------------------------------------------------------------
+#endif //_DHEIGHTFIELD_H_
diff --git a/libs/ode-0.16.1/ode/src/joints/Makefile.am b/libs/ode-0.16.1/ode/src/joints/Makefile.am
new file mode 100644
index 0000000..194ef60
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/Makefile.am
@@ -0,0 +1,37 @@
+AM_CPPFLAGS = -I$(top_srcdir)/include \
+        -I$(top_builddir)/include \
+        -I$(top_srcdir)/ode/src \
+        -D__ODE__
+
+
+if ENABLE_OU
+
+AM_CPPFLAGS += -I$(top_srcdir)/ou/include
+
+
+endif
+
+
+noinst_LTLIBRARIES = libjoints.la
+
+libjoints_la_SOURCES =  joints.h \
+                        joint.h joint.cpp \
+                        joint_internal.h \
+                        ball.h ball.cpp \
+                        dball.h dball.cpp \
+                        dhinge.h dhinge.cpp \
+                        transmission.h transmission.cpp \
+                        hinge.h hinge.cpp \
+                        slider.h slider.cpp \
+                        contact.h contact.cpp \
+                        universal.h universal.cpp \
+                        hinge2.h hinge2.cpp \
+                        fixed.h fixed.cpp \
+                        null.h null.cpp \
+                        amotor.h amotor.cpp \
+                        lmotor.h lmotor.cpp \
+                        plane2d.h plane2d.cpp \
+                        pu.h pu.cpp \
+                        pr.h pr.cpp \
+                        piston.h piston.cpp
+
diff --git a/libs/ode-0.16.1/ode/src/joints/Makefile.in b/libs/ode-0.16.1/ode/src/joints/Makefile.in
new file mode 100644
index 0000000..9e43f9f
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/Makefile.in
@@ -0,0 +1,668 @@
+# Makefile.in generated by automake 1.15 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+  if test -z '$(MAKELEVEL)'; then \
+    false; \
+  elif test -n '$(MAKE_HOST)'; then \
+    true; \
+  elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+    true; \
+  else \
+    false; \
+  fi; \
+}
+am__make_running_with_option = \
+  case $${target_option-} in \
+      ?) ;; \
+      *) echo "am__make_running_with_option: internal error: invalid" \
+              "target option '$${target_option-}' specified" >&2; \
+         exit 1;; \
+  esac; \
+  has_opt=no; \
+  sane_makeflags=$$MAKEFLAGS; \
+  if $(am__is_gnu_make); then \
+    sane_makeflags=$$MFLAGS; \
+  else \
+    case $$MAKEFLAGS in \
+      *\\[\ \	]*) \
+        bs=\\; \
+        sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+          | sed "s/$$bs$$bs[$$bs $$bs	]*//g"`;; \
+    esac; \
+  fi; \
+  skip_next=no; \
+  strip_trailopt () \
+  { \
+    flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+  }; \
+  for flg in $$sane_makeflags; do \
+    test $$skip_next = yes && { skip_next=no; continue; }; \
+    case $$flg in \
+      *=*|--*) continue;; \
+        -*I) strip_trailopt 'I'; skip_next=yes;; \
+      -*I?*) strip_trailopt 'I';; \
+        -*O) strip_trailopt 'O'; skip_next=yes;; \
+      -*O?*) strip_trailopt 'O';; \
+        -*l) strip_trailopt 'l'; skip_next=yes;; \
+      -*l?*) strip_trailopt 'l';; \
+      -[dEDm]) skip_next=yes;; \
+      -[JT]) skip_next=yes;; \
+    esac; \
+    case $$flg in \
+      *$$target_option*) has_opt=yes; break;; \
+    esac; \
+  done; \
+  test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@ENABLE_OU_TRUE@am__append_1 = -I$(top_srcdir)/ou/include
+subdir = ode/src/joints
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
+	$(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+	$(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+	$(top_srcdir)/m4/pkg.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+	$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/ode/src/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libjoints_la_LIBADD =
+am_libjoints_la_OBJECTS = joint.lo ball.lo dball.lo dhinge.lo \
+	transmission.lo hinge.lo slider.lo contact.lo universal.lo \
+	hinge2.lo fixed.lo null.lo amotor.lo lmotor.lo plane2d.lo \
+	pu.lo pr.lo piston.lo
+libjoints_la_OBJECTS = $(am_libjoints_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo "  GEN     " $@;
+am__v_GEN_1 = 
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 = 
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/ode/src
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+	$(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CXXFLAGS) $(CXXFLAGS)
+AM_V_CXX = $(am__v_CXX_@AM_V@)
+am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@)
+am__v_CXX_0 = @echo "  CXX     " $@;
+am__v_CXX_1 = 
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+	$(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CXXLD = $(am__v_CXXLD_@AM_V@)
+am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@)
+am__v_CXXLD_0 = @echo "  CXXLD   " $@;
+am__v_CXXLD_1 = 
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+	$(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo "  CC      " $@;
+am__v_CC_1 = 
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+	$(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo "  CCLD    " $@;
+am__v_CCLD_1 = 
+SOURCES = $(libjoints_la_SOURCES)
+DIST_SOURCES = $(libjoints_la_SOURCES)
+am__can_run_installinfo = \
+  case $$AM_UPDATE_INFO_DIR in \
+    n|no|NO) false;; \
+    *) (install-info --version) >/dev/null 2>&1;; \
+  esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates.  Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+  BEGIN { nonempty = 0; } \
+  { items[$$0] = 1; nonempty = 1; } \
+  END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique.  This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+  list='$(am__tagged_files)'; \
+  unique=`for i in $$list; do \
+    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+  done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CCD_CFLAGS = @CCD_CFLAGS@
+CCD_LIBS = @CCD_LIBS@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DOXYGEN = @DOXYGEN@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXTRA_LIBTOOL_LDFLAGS = @EXTRA_LIBTOOL_LDFLAGS@
+FGREP = @FGREP@
+GL_LIBS = @GL_LIBS@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBSTDCXX = @LIBSTDCXX@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+ODE_PRECISION = @ODE_PRECISION@
+ODE_VERSION = @ODE_VERSION@
+ODE_VERSION_INFO = @ODE_VERSION_INFO@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+WINDRES = @WINDRES@
+X11_CFLAGS = @X11_CFLAGS@
+X11_LIBS = @X11_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+ac_ct_WINDRES = @ac_ct_WINDRES@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+subdirs = @subdirs@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_builddir)/include \
+	-I$(top_srcdir)/ode/src -D__ODE__ $(am__append_1)
+noinst_LTLIBRARIES = libjoints.la
+libjoints_la_SOURCES = joints.h \
+                        joint.h joint.cpp \
+                        joint_internal.h \
+                        ball.h ball.cpp \
+                        dball.h dball.cpp \
+                        dhinge.h dhinge.cpp \
+                        transmission.h transmission.cpp \
+                        hinge.h hinge.cpp \
+                        slider.h slider.cpp \
+                        contact.h contact.cpp \
+                        universal.h universal.cpp \
+                        hinge2.h hinge2.cpp \
+                        fixed.h fixed.cpp \
+                        null.h null.cpp \
+                        amotor.h amotor.cpp \
+                        lmotor.h lmotor.cpp \
+                        plane2d.h plane2d.cpp \
+                        pu.h pu.cpp \
+                        pr.h pr.cpp \
+                        piston.h piston.cpp
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cpp .lo .o .obj
+$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am  $(am__configure_deps)
+	@for dep in $?; do \
+	  case '$(am__configure_deps)' in \
+	    *$$dep*) \
+	      ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+	        && { if test -f $@; then exit 0; else break; fi; }; \
+	      exit 1;; \
+	  esac; \
+	done; \
+	echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign ode/src/joints/Makefile'; \
+	$(am__cd) $(top_srcdir) && \
+	  $(AUTOMAKE) --foreign ode/src/joints/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+	@case '$?' in \
+	  *config.status*) \
+	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+	  *) \
+	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+	esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure:  $(am__configure_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
+	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+	-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+	@list='$(noinst_LTLIBRARIES)'; \
+	locs=`for p in $$list; do echo $$p; done | \
+	      sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+	      sort -u`; \
+	test -z "$$locs" || { \
+	  echo rm -f $${locs}; \
+	  rm -f $${locs}; \
+	}
+
+libjoints.la: $(libjoints_la_OBJECTS) $(libjoints_la_DEPENDENCIES) $(EXTRA_libjoints_la_DEPENDENCIES) 
+	$(AM_V_CXXLD)$(CXXLINK)  $(libjoints_la_OBJECTS) $(libjoints_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+	-rm -f *.$(OBJEXT)
+
+distclean-compile:
+	-rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/amotor.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ball.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/contact.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dball.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dhinge.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fixed.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hinge.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hinge2.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/joint.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lmotor.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/null.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/piston.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/plane2d.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pr.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pu.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/slider.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/transmission.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/universal.Plo@am__quote@
+
+.cpp.o:
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $<
+
+.cpp.obj:
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cpp.lo:
+@am__fastdepCXX_TRUE@	$(AM_V_CXX)$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	$(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@	DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@	$(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+	$(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	set x; \
+	here=`pwd`; \
+	$(am__define_uniq_tagged_files); \
+	shift; \
+	if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+	  test -n "$$unique" || unique=$$empty_fix; \
+	  if test $$# -gt 0; then \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      "$$@" $$unique; \
+	  else \
+	    $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+	      $$unique; \
+	  fi; \
+	fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+	$(am__define_uniq_tagged_files); \
+	test -z "$(CTAGS_ARGS)$$unique" \
+	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+	     $$unique
+
+GTAGS:
+	here=`$(am__cd) $(top_builddir) && pwd` \
+	  && $(am__cd) $(top_srcdir) \
+	  && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+	list='$(am__tagged_files)'; \
+	case "$(srcdir)" in \
+	  [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+	  *) sdir=$(subdir)/$(srcdir) ;; \
+	esac; \
+	for i in $$list; do \
+	  if test -f "$$i"; then \
+	    echo "$(subdir)/$$i"; \
+	  else \
+	    echo "$$sdir/$$i"; \
+	  fi; \
+	done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+	@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+	list='$(DISTFILES)'; \
+	  dist_files=`for file in $$list; do echo $$file; done | \
+	  sed -e "s|^$$srcdirstrip/||;t" \
+	      -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+	case $$dist_files in \
+	  */*) $(MKDIR_P) `echo "$$dist_files" | \
+			   sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+			   sort -u` ;; \
+	esac; \
+	for file in $$dist_files; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  if test -d $$d/$$file; then \
+	    dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+	    if test -d "$(distdir)/$$file"; then \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+	      find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+	    fi; \
+	    cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+	  else \
+	    test -f "$(distdir)/$$file" \
+	    || cp -p $$d/$$file "$(distdir)/$$file" \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	if test -z '$(STRIP)'; then \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	      install; \
+	else \
+	  $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	    install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+	    "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+	fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+	-test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+	mostlyclean-am
+
+distclean: distclean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+	distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+	-rm -rf ./$(DEPDIR)
+	-rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+	mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+	clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
+	ctags-am distclean distclean-compile distclean-generic \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-data \
+	install-data-am install-dvi install-dvi-am install-exec \
+	install-exec-am install-html install-html-am install-info \
+	install-info-am install-man install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/libs/ode-0.16.1/ode/src/joints/amotor.cpp b/libs/ode-0.16.1/ode/src/joints/amotor.cpp
new file mode 100644
index 0000000..aa30c76
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/amotor.cpp
@@ -0,0 +1,810 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "common.h"
+#include "amotor.h"
+#include "joint_internal.h"
+#include "odeou.h"
+
+
+/*extern */
+void dJointSetAMotorNumAxes(dJointID j, int num)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    dAASSERT(dIN_RANGE(num, dSA__MIN, dSA__MAX + 1));
+    checktype(joint, AMotor);
+
+    num = dCLAMP(num, dSA__MIN, dSA__MAX);
+
+    joint->setNumAxes(num);
+}
+
+/*extern */
+void dJointSetAMotorAxis(dJointID j, int anum, int rel/*=dJointBodyRelativity*/, 
+    dReal x, dReal y, dReal z)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+    dAASSERT(dIN_RANGE(rel, dJBR__MIN, dJBR__MAX));
+    checktype(joint, AMotor);
+
+    anum = dCLAMP(anum, dSA__MIN, dSA__MAX - 1);
+
+    joint->setAxisValue(anum, (dJointBodyRelativity)rel, x, y, z);
+}
+
+/*extern */
+void dJointSetAMotorAngle(dJointID j, int anum, dReal angle)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+    checktype(joint, AMotor);
+
+    anum = dCLAMP(anum, dSA__MIN, dSA__MAX - 1);
+
+    joint->setAngleValue(anum, angle);
+}
+
+/*extern */
+void dJointSetAMotorParam(dJointID j, int parameter, dReal value)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    checktype(joint, AMotor);
+
+    int anum = parameter >> 8;
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+
+    anum = dCLAMP(anum, dSA__MIN, dSA__MAX - 1);
+
+    int limotParam = parameter & 0xff;
+    joint->setLimotParameter(anum, limotParam, value);
+}
+
+/*extern */
+void dJointSetAMotorMode(dJointID j, int mode)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    checktype(joint, AMotor);
+
+    joint->setOperationMode(mode);
+}
+
+/*extern */
+int dJointGetAMotorNumAxes(dJointID j)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    checktype(joint, AMotor);
+
+    return joint->getNumAxes();
+}
+
+/*extern */
+void dJointGetAMotorAxis(dJointID j, int anum, dVector3 result)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+    checktype(joint, AMotor);
+
+    anum = dCLAMP(anum, dSA__MIN, dSA__MAX - 1);
+
+    joint->getAxisValue(result, anum);
+}
+
+/*extern */
+int dJointGetAMotorAxisRel(dJointID j, int anum)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+    checktype(joint, AMotor);
+
+    anum = dCLAMP(anum, dSA__MIN, dSA__MAX - 1);
+
+    int result = joint->getAxisBodyRelativity(anum);
+    return result;
+}
+
+/*extern */
+dReal dJointGetAMotorAngle(dJointID j, int anum)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+    checktype(joint, AMotor);
+
+    anum = dCLAMP(anum, dSA__MIN, dSA__MAX - 1);
+
+    dReal result = joint->getAngleValue(anum);
+    return result;
+}
+
+/*extern */
+dReal dJointGetAMotorAngleRate(dJointID j, int anum)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+    checktype(joint, AMotor);
+
+    anum = dCLAMP(anum, dSA__MIN, dSA__MAX - 1);
+
+    dReal result = joint->calculateAngleRate(anum);
+    return result;
+}
+
+/*extern */
+dReal dJointGetAMotorParam(dJointID j, int parameter)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    checktype(joint, AMotor);
+
+    int anum = parameter >> 8;
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+
+    anum = dCLAMP(anum, dSA__MIN, dSA__MAX - 1);
+
+    int limotParam = parameter & 0xff;
+    dReal result = joint->getLimotParameter(anum, limotParam);
+    return result;
+}
+
+/*extern */
+int dJointGetAMotorMode(dJointID j)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    checktype(joint, AMotor);
+
+    int result = joint->getOperationMode();
+    return result;
+}
+
+/*extern */
+void dJointAddAMotorTorques(dJointID j, dReal torque1, dReal torque2, dReal torque3)
+{
+    dxJointAMotor* joint = (dxJointAMotor*)j;
+    dAASSERT(joint != NULL);
+    checktype(joint, AMotor);
+
+    joint->addTorques(torque1, torque2, torque3);
+}
+
+
+//****************************************************************************
+
+BEGIN_NAMESPACE_OU();
+template<>
+const dJointBodyRelativity CEnumUnsortedElementArray<dSpaceAxis, dSA__MAX, dJointBodyRelativity, 0x160703D5>::m_aetElementArray[] =
+{
+    dJBR_BODY1, // dSA_X,
+    dJBR_GLOBAL, // dSA_Y,
+    dJBR_BODY2, // dSA_Z,
+};
+END_NAMESPACE_OU();
+static const CEnumUnsortedElementArray<dSpaceAxis, dSA__MAX, dJointBodyRelativity, 0x160703D5> g_abrEulerAxisAllowedBodyRelativities;
+
+static inline 
+dSpaceAxis EncodeJointConnectedBodyEulerAxis(dJointConnectedBody cbBodyIndex)
+{
+    dSASSERT(dJCB__MAX == 2); 
+    
+    return cbBodyIndex == dJCB_FIRST_BODY ? dSA_X : dSA_Z;
+}
+
+static inline 
+dSpaceAxis EncodeOtherEulerAxis(dSpaceAxis saOneAxis)
+{
+    dIASSERT(saOneAxis == EncodeJointConnectedBodyEulerAxis(dJCB_FIRST_BODY) || saOneAxis == EncodeJointConnectedBodyEulerAxis(dJCB_SECOND_BODY)); 
+    dSASSERT(dJCB__MAX == 2); 
+    
+    return (dSpaceAxis)(dSA_X + dSA_Z - saOneAxis);
+}
+
+
+//****************************************************************************
+// angular motor
+
+dxJointAMotor::dxJointAMotor(dxWorld *w) :
+    dxJointAMotor_Parent(w),
+    m_mode(dAMotorUser),
+    m_num(0)
+{
+    std::fill(m_rel, m_rel + dARRAY_SIZE(m_rel), dJBR__DEFAULT);
+    { for (int i = 0; i != dARRAY_SIZE(m_axis); ++i) { dZeroVector3(m_axis[i]); } }
+    { for (int i = 0; i != dARRAY_SIZE(m_references); ++i) { dZeroVector3(m_references[i]); } }
+    std::fill(m_angle, m_angle + dARRAY_SIZE(m_angle), REAL(0.0));
+    { for (int i = 0; i != dARRAY_SIZE(m_limot); ++i) { m_limot[i].init(w); } }
+}
+
+
+/*virtual */
+dxJointAMotor::~dxJointAMotor()
+{
+    // The virtual destructor
+}
+
+
+/*virtual */
+void dxJointAMotor::getSureMaxInfo(SureMaxInfo* info)
+{
+    info->max_m = m_num;
+}
+
+/*virtual */
+void dxJointAMotor::getInfo1(dxJoint::Info1 *info)
+{
+    info->m = 0;
+    info->nub = 0;
+
+    // compute the axes and angles, if in Euler mode
+    if (m_mode == dAMotorEuler)
+    {
+        dVector3 ax[dSA__MAX];
+        computeGlobalAxes(ax);
+        computeEulerAngles(ax);
+    }
+
+    // see if we're powered or at a joint limit for each axis
+    const unsigned num = m_num;
+    for (unsigned i = 0; i != num; ++i)
+    {
+        if (m_limot[i].testRotationalLimit(m_angle[i]) 
+            || m_limot[i].fmax > 0)
+        {
+            info->m++;
+        }
+    }
+}
+
+/*virtual */
+void dxJointAMotor::getInfo2(dReal worldFPS, dReal /*worldERP*/, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex)
+{
+    // compute the axes (if not global)
+    dVector3 ax[dSA__MAX];
+    computeGlobalAxes(ax);
+
+    // in Euler angle mode we do not actually constrain the angular velocity
+    // along the axes axis[0] and axis[2] (although we do use axis[1]) :
+    //
+    //    to get   constrain w2-w1 along  ...not
+    //    ------   ---------------------  ------
+    //    d(angle[0])/dt = 0 ax[1] x ax[2]   ax[0]
+    //    d(angle[1])/dt = 0 ax[1]
+    //    d(angle[2])/dt = 0 ax[0] x ax[1]   ax[2]
+    //
+    // constraining w2-w1 along an axis 'a' means that a'*(w2-w1)=0.
+    // to prove the result for angle[0], write the expression for angle[0] from
+    // GetInfo1 then take the derivative. to prove this for angle[2] it is
+    // easier to take the Euler rate expression for d(angle[2])/dt with respect
+    // to the components of w and set that to 0.
+
+    dVector3 *axptr[dSA__MAX];
+    for (int j = dSA__MIN; j != dSA__MAX; ++j) { axptr[j] = &ax[j]; }
+
+    dVector3 ax0_cross_ax1;
+    dVector3 ax1_cross_ax2;
+    
+    if (m_mode == dAMotorEuler) 
+    {
+        dCalcVectorCross3(ax0_cross_ax1, ax[dSA_X], ax[dSA_Y]);
+        axptr[dSA_Z] = &ax0_cross_ax1;
+        dCalcVectorCross3(ax1_cross_ax2, ax[dSA_Y], ax[dSA_Z]);
+        axptr[dSA_X] = &ax1_cross_ax2;
+    }
+
+    sizeint rowTotalSkip = 0, pairTotalSkip = 0;
+    
+    const unsigned num = m_num;
+    for (unsigned i = 0; i != num; ++i) 
+    {
+        if (m_limot[i].addLimot(this, worldFPS, J1 + rowTotalSkip, J2 + rowTotalSkip, pairRhsCfm + pairTotalSkip, pairLoHi + pairTotalSkip, *(axptr[i]), 1)) 
+        {
+            rowTotalSkip += rowskip;
+            pairTotalSkip += pairskip;
+        }
+    }
+}
+
+/*virtual */
+dJointType dxJointAMotor::type() const
+{
+    return dJointTypeAMotor;
+}
+
+/*virtual */
+sizeint dxJointAMotor::size() const
+{
+    return sizeof(*this);
+}
+
+
+void dxJointAMotor::setOperationMode(int mode)
+{
+    m_mode = mode;
+
+    if (mode == dAMotorEuler)
+    {
+        m_num = dSA__MAX;
+        setEulerReferenceVectors();
+    }
+}
+
+
+void dxJointAMotor::setNumAxes(unsigned num)
+{
+    if (m_mode == dAMotorEuler)
+    {
+        m_num = dSA__MAX;
+    }
+    else
+    {
+        m_num = num;
+    }
+}
+
+
+dJointBodyRelativity dxJointAMotor::getAxisBodyRelativity(unsigned anum) const
+{
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+
+    dJointBodyRelativity rel = m_rel[anum];
+    if (dJBREncodeBodyRelativityStatus(rel) && GetIsJointReverse())
+    {
+        rel = dJBRSwapBodyRelativity(rel); // turns 1 into 2, 2 into 1
+    }
+
+    return rel;
+}
+
+
+void dxJointAMotor::setAxisValue(unsigned anum, dJointBodyRelativity rel, 
+    dReal x, dReal y, dReal z)
+{
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+    dAASSERT(m_mode != dAMotorEuler || !dJBREncodeBodyRelativityStatus(rel) || rel == g_abrEulerAxisAllowedBodyRelativities.Encode((dSpaceAxis)anum));
+
+    // x,y,z is always in global coordinates regardless of rel, so we may have
+    // to convert it to be relative to a body
+    dVector3 r;
+    dAssignVector3(r, x, y, z);
+
+    // adjust rel to match the internal body order
+    if (dJBREncodeBodyRelativityStatus(rel) && GetIsJointReverse())
+    {
+        rel = dJBRSwapBodyRelativity(rel); // turns 1 into 2, 2, into 1
+    }
+
+    m_rel[anum] = rel;
+
+    bool assigned = false;
+
+    if (dJBREncodeBodyRelativityStatus(rel))
+    {
+        if (rel == dJBR_BODY1)
+        {
+            dMultiply1_331(m_axis[anum], this->node[0].body->posr.R, r);
+            assigned = true;
+        }
+        // rel == 2
+        else if (this->node[1].body != NULL)
+        {
+            dIASSERT(rel == dJBR_BODY2);
+
+            dMultiply1_331(m_axis[anum], this->node[1].body->posr.R, r);
+            assigned = true;
+        }
+    }
+    
+    if (!assigned)
+    {
+        dCopyVector3(m_axis[anum], r); 
+    }
+    
+    dNormalize3(m_axis[anum]);
+    
+    if (m_mode == dAMotorEuler) 
+    {
+        setEulerReferenceVectors();
+    }
+}
+
+void dxJointAMotor::getAxisValue(dVector3 result, unsigned anum) const
+{
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+
+    switch (m_mode)
+    {
+        case dAMotorUser:
+        {
+            doGetUserAxis(result, anum);
+            break;
+        }
+
+        case dAMotorEuler:
+        {
+            doGetEulerAxis(result, anum);
+            break;
+        }
+
+        default:
+        {
+            dIASSERT(false);
+            break;
+        }
+    } 
+}
+
+
+void dxJointAMotor::doGetUserAxis(dVector3 result, unsigned anum) const
+{
+    bool retrieved = false;
+
+    if (dJBREncodeBodyRelativityStatus(m_rel[anum])) 
+    {
+        if (m_rel[anum] == dJBR_BODY1)
+        {
+            dMultiply0_331(result, this->node[0].body->posr.R, m_axis[anum]);
+            retrieved = true;
+        }
+        else if (this->node[1].body != NULL)
+        {
+            dMultiply0_331(result, this->node[1].body->posr.R, m_axis[anum]);
+            retrieved = true;
+        }
+    }
+
+    if (!retrieved)
+    {
+        dCopyVector3(result, m_axis[anum]);
+    }
+}
+
+void dxJointAMotor::doGetEulerAxis(dVector3 result, unsigned anum) const
+{
+    // If we're in Euler mode, joint->axis[1] doesn't
+    // have anything sensible in it.  So don't just return
+    // that, find the actual effective axis.
+    // Likewise, the actual axis of rotation for the
+    // the other axes is different from what's stored.
+    dVector3 axes[dSA__MAX];
+    computeGlobalAxes(axes);
+
+    if (anum == dSA_Y) 
+    {
+        dCopyVector3(result, axes[dSA_Y]);
+    } 
+    else if (anum < dSA_Y) // Comparing against the same constant lets compiler reuse EFLAGS register for another conditional jump
+    {
+        dSASSERT(dSA_X < dSA_Y); // Otherwise the condition above is incorrect
+        dIASSERT(anum == dSA_X);
+
+        // This won't be unit length in general,
+        // but it's what's used in getInfo2
+        // This may be why things freak out as
+        // the body-relative axes get close to each other.
+        dCalcVectorCross3(result, axes[dSA_Y], axes[dSA_Z]);
+    } 
+    else 
+    {
+        dSASSERT(dSA_Z > dSA_Y); // Otherwise the condition above is incorrect
+        dIASSERT(anum == dSA_Z);
+
+        // Same problem as above.
+        dCalcVectorCross3(result, axes[dSA_X], axes[dSA_Y]);
+    }
+}
+
+
+void dxJointAMotor::setAngleValue(unsigned anum, dReal angle)
+{
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+    dAASSERT(m_mode == dAMotorUser); // This only works for the dAMotorUser
+
+    if (m_mode == dAMotorUser)
+    {
+        m_angle[anum] = angle;
+    }
+}
+
+
+dReal dxJointAMotor::calculateAngleRate(unsigned anum) const
+{
+    dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX));
+    dAASSERT(this->node[0].body != NULL); // Don't call for angle rate before the joint is set up
+
+    dVector3 axis;
+    getAxisValue(axis, anum);
+
+    // NOTE!
+    // For reverse joints, the rate is negated at the function exit to create swapped bodies effect
+    dReal rate = dDOT(axis, this->node[0].body->avel);
+
+    if (this->node[1].body != NULL) 
+    {
+        rate -= dDOT(axis, this->node[1].body->avel);
+    }
+
+    // Negating the rate for reverse joints creates an effect of body swapping
+    dReal result = !GetIsJointReverse() ? rate : -rate;
+    return result;
+}
+
+
+void dxJointAMotor::addTorques(dReal torque1, dReal torque2, dReal torque3)
+{
+    unsigned num = getNumAxes();
+    dAASSERT(dIN_RANGE(num, dSA__MIN, dSA__MAX + 1));
+
+    dVector3 sum;
+    dVector3 torqueVector;
+    dVector3 axes[dSA__MAX];
+
+
+    if (num != dSA__MIN)
+    {
+        computeGlobalAxes(axes);
+
+        if (!GetIsJointReverse())
+        {
+            dAssignVector3(torqueVector, torque1, torque2, torque3);
+        }
+        else
+        {
+            // Negating torques creates an effect of swapped bodies later
+            dAssignVector3(torqueVector, -torque1, -torque2, -torque3);
+        }
+    }
+
+    switch (num)
+    {
+        case dSA_Z + 1:
+        {
+            dAddThreeScaledVectors3(sum, axes[dSA_Z], axes[dSA_Y], axes[dSA_X], torqueVector[dSA_Z], torqueVector[dSA_Y], torqueVector[dSA_X]);
+            break;
+        }
+
+        case dSA_Y + 1:
+        {
+            dAddScaledVectors3(sum, axes[dSA_Y], axes[dSA_X], torqueVector[dSA_Y], torqueVector[dSA_X]);
+            break;
+        }
+
+        case dSA_X + 1:
+        {
+            dCopyScaledVector3(sum, axes[dSA_X], torqueVector[dSA_X]);
+            break;
+        }
+        
+        default:
+        {
+            dSASSERT(dSA_Z > dSA_Y); // Otherwise the addends order needs to be switched
+            dSASSERT(dSA_Y > dSA_X);
+            
+            // Do nothing
+            break;
+        }
+    }
+
+    if (num != dSA__MIN)
+    {
+        dAASSERT(this->node[0].body != NULL); // Don't add torques unless you set the joint up first!
+
+        // NOTE!
+        // For reverse joints, the torqueVector negated at function entry produces the effect of swapped bodies
+        dBodyAddTorque(this->node[0].body, sum[dV3E_X], sum[dV3E_Y], sum[dV3E_Z]);
+        
+        if (this->node[1].body != NULL)
+        {
+            dBodyAddTorque(this->node[1].body, -sum[dV3E_X], -sum[dV3E_Y], -sum[dV3E_Z]);
+        }
+    }
+}
+
+
+// compute the 3 axes in global coordinates
+void dxJointAMotor::computeGlobalAxes(dVector3 ax[dSA__MAX]) const
+{
+    switch (m_mode)
+    {
+        case dAMotorUser:
+        {
+            doComputeGlobalUserAxes(ax);
+            break;
+        }
+
+        case dAMotorEuler:
+        {
+            doComputeGlobalEulerAxes(ax);
+            break;
+        }
+
+        default:
+        {
+            dIASSERT(false);
+            break;
+        }
+    } 
+}
+
+void dxJointAMotor::doComputeGlobalUserAxes(dVector3 ax[dSA__MAX]) const
+{
+    unsigned num = m_num;
+    for (unsigned i = 0; i != num; ++i)
+    {
+        bool assigned = false;
+
+        if (m_rel[i] == dJBR_BODY1)
+        {
+            // relative to b1
+            dMultiply0_331(ax[i], this->node[0].body->posr.R, m_axis[i]);
+            assigned = true;
+        }
+        else if (m_rel[i] == dJBR_BODY2)
+        {
+            // relative to b2
+            if (this->node[1].body != NULL)
+            {
+                dMultiply0_331(ax[i], this->node[1].body->posr.R, m_axis[i]);
+                assigned = true;
+            }
+        }
+
+        if (!assigned)
+        {
+            // global - just copy it
+            dCopyVector3(ax[i], m_axis[i]);
+        }
+    }
+}
+
+void dxJointAMotor::doComputeGlobalEulerAxes(dVector3 ax[dSA__MAX]) const
+{
+    // special handling for Euler mode
+    
+    dSpaceAxis firstBodyAxis = BuildFirstBodyEulerAxis();
+    dMultiply0_331(ax[firstBodyAxis], this->node[0].body->posr.R, m_axis[firstBodyAxis]);
+
+    dSpaceAxis secondBodyAxis = EncodeOtherEulerAxis(firstBodyAxis);
+
+    if (this->node[1].body != NULL)
+    {
+        dMultiply0_331(ax[secondBodyAxis], this->node[1].body->posr.R, m_axis[secondBodyAxis]);
+    }
+    else
+    {
+        dCopyVector3(ax[secondBodyAxis], m_axis[secondBodyAxis]);
+    }
+
+    dCalcVectorCross3(ax[dSA_Y], ax[dSA_Z], ax[dSA_X]);
+    dNormalize3(ax[dSA_Y]);
+}
+
+
+void dxJointAMotor::computeEulerAngles(dVector3 ax[dSA__MAX])
+{
+    // assumptions:
+    //   global axes already calculated --> ax
+    //   axis[0] is relative to body 1 --> global ax[0]
+    //   axis[2] is relative to body 2 --> global ax[2]
+    //   ax[1] = ax[2] x ax[0]
+    //   original ax[0] and ax[2] are perpendicular
+    //   reference1 is perpendicular to ax[0] (in body 1 frame)
+    //   reference2 is perpendicular to ax[2] (in body 2 frame)
+    //   all ax[] and reference vectors are unit length
+
+    // calculate references in global frame
+    dVector3 refs[dJCB__MAX];
+    dMultiply0_331(refs[dJCB_FIRST_BODY], this->node[0].body->posr.R, m_references[dJCB_FIRST_BODY]);
+
+    if (this->node[1].body != NULL)
+    {
+        dMultiply0_331(refs[dJCB_SECOND_BODY], this->node[1].body->posr.R, m_references[dJCB_SECOND_BODY]);
+    }
+    else
+    {
+        dCopyVector3(refs[dJCB_SECOND_BODY], m_references[dJCB_SECOND_BODY]);
+    }
+
+
+    // get q perpendicular to both ax[0] and ref1, get first euler angle
+    dVector3 q;
+    dJointConnectedBody firstAxisBody = BuildFirstEulerAxisBody();
+
+    dCalcVectorCross3(q, ax[dSA_X], refs[firstAxisBody]);
+    m_angle[dSA_X] = -dAtan2(dCalcVectorDot3(ax[dSA_Z], q), dCalcVectorDot3(ax[dSA_Z], refs[firstAxisBody]));
+
+    // get q perpendicular to both ax[0] and ax[1], get second euler angle
+    dCalcVectorCross3(q, ax[dSA_X], ax[dSA_Y]);
+    m_angle[dSA_Y] = -dAtan2(dCalcVectorDot3(ax[dSA_Z], ax[dSA_X]), dCalcVectorDot3(ax[dSA_Z], q));
+
+    dJointConnectedBody secondAxisBody = EncodeJointOtherConnectedBody(firstAxisBody);
+
+    // get q perpendicular to both ax[1] and ax[2], get third euler angle
+    dCalcVectorCross3(q, ax[dSA_Y], ax[dSA_Z]);
+    m_angle[dSA_Z] = -dAtan2(dCalcVectorDot3(refs[secondAxisBody], ax[dSA_Y]), dCalcVectorDot3(refs[secondAxisBody], q));
+}
+
+
+// set the reference vectors as follows:
+//   * reference1 = current axis[2] relative to body 1
+//   * reference2 = current axis[0] relative to body 2
+// this assumes that:
+//    * axis[0] is relative to body 1
+//    * axis[2] is relative to body 2
+
+void dxJointAMotor::setEulerReferenceVectors()
+{
+    if (/*this->node[0].body != NULL && */this->node[1].body != NULL)
+    {
+        dIASSERT(this->node[0].body != NULL);
+
+        dVector3 r;  // axis[2] and axis[0] in global coordinates
+
+        dSpaceAxis firstBodyAxis = BuildFirstBodyEulerAxis();
+        dMultiply0_331(r, this->node[0].body->posr.R, m_axis[firstBodyAxis]);
+        dMultiply1_331(m_references[dJCB_SECOND_BODY], this->node[1].body->posr.R, r);
+
+        dSpaceAxis secondBodyAxis = EncodeOtherEulerAxis(firstBodyAxis);
+        dMultiply0_331(r, this->node[1].body->posr.R, m_axis[secondBodyAxis]);
+        dMultiply1_331(m_references[dJCB_FIRST_BODY], this->node[0].body->posr.R, r);
+    } 
+    else 
+    {
+        // We want to handle angular motors attached to passive geoms
+        // Replace missing node.R with identity
+        if (this->node[0].body != NULL) 
+        {
+            dSpaceAxis firstBodyAxis = BuildFirstBodyEulerAxis();
+            dMultiply0_331(m_references[dJCB_SECOND_BODY], this->node[0].body->posr.R, m_axis[firstBodyAxis]);
+
+            dSpaceAxis secondBodyAxis = EncodeOtherEulerAxis(firstBodyAxis);
+            dMultiply1_331(m_references[dJCB_FIRST_BODY], this->node[0].body->posr.R, m_axis[secondBodyAxis]);
+        } 
+    }
+}
+
+/*inline */
+dSpaceAxis dxJointAMotor::BuildFirstBodyEulerAxis() const
+{
+    return EncodeJointConnectedBodyEulerAxis(BuildFirstEulerAxisBody());
+}
+
+/*inline */
+dJointConnectedBody dxJointAMotor::BuildFirstEulerAxisBody() const
+{
+    return !GetIsJointReverse() ? dJCB_FIRST_BODY : dJCB_SECOND_BODY;
+}
+
diff --git a/libs/ode-0.16.1/ode/src/joints/amotor.h b/libs/ode-0.16.1/ode/src/joints/amotor.h
new file mode 100644
index 0000000..2fd421c
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/amotor.h
@@ -0,0 +1,105 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_AMOTOR_H_
+#define _ODE_JOINT_AMOTOR_H_
+
+#include "joint.h"
+
+
+// angular motor
+
+typedef dxJoint dxJointAMotor_Parent;
+class dxJointAMotor:
+    public dxJointAMotor_Parent
+{
+public:
+    dxJointAMotor(dxWorld *w);
+    virtual ~dxJointAMotor();
+
+public:
+    virtual void getSureMaxInfo(SureMaxInfo* info);
+    virtual void getInfo1(Info1* info);
+    virtual void getInfo2(dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex);
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+public:
+    void setOperationMode(int mode);
+    int getOperationMode() const { return m_mode; }
+
+    void setNumAxes(unsigned num);
+    int getNumAxes() const { return m_num; }
+
+    dJointBodyRelativity getAxisBodyRelativity(unsigned anum) const;
+
+    void setAxisValue(unsigned anum, dJointBodyRelativity rel, dReal x, dReal y, dReal z);
+    void getAxisValue(dVector3 result, unsigned anum) const;
+
+private:
+    void doGetUserAxis(dVector3 result, unsigned anum) const;
+    void doGetEulerAxis(dVector3 result, unsigned anum) const;
+
+public:
+    void setAngleValue(unsigned anum, dReal angle);
+    dReal getAngleValue(unsigned anum) const { dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX)); return m_angle[anum]; }
+
+    dReal calculateAngleRate(unsigned anum) const;
+
+    void setLimotParameter(unsigned anum, int limotParam, dReal value) { dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX)); m_limot[anum].set(limotParam, value); }
+    dReal getLimotParameter(unsigned anum, int limotParam) const { dAASSERT(dIN_RANGE(anum, dSA__MIN, dSA__MAX)); return m_limot[anum].get(limotParam); }
+
+public:
+    void addTorques(dReal torque1, dReal torque2, dReal torque3);
+
+private:
+    void computeGlobalAxes(dVector3 ax[dSA__MAX]) const;
+    void doComputeGlobalUserAxes(dVector3 ax[dSA__MAX]) const;
+    void doComputeGlobalEulerAxes(dVector3 ax[dSA__MAX]) const;
+
+    void computeEulerAngles(dVector3 ax[dSA__MAX]);
+    void setEulerReferenceVectors();
+
+private:
+    inline dSpaceAxis BuildFirstBodyEulerAxis() const;
+    inline dJointConnectedBody BuildFirstEulerAxisBody() const;
+
+private:
+    friend struct dxAMotorJointPrinter;
+
+private:
+    int m_mode;                                   // a dAMotorXXX constant
+    unsigned m_num;                               // number of axes (0..3)
+    dJointBodyRelativity m_rel[dSA__MAX];         // what the axes are relative to (global,b1,b2)
+    dVector3 m_axis[dSA__MAX];                    // three axes
+    // these vectors are used for calculating Euler angles
+    dVector3 m_references[dJCB__MAX];             // original axis[2], relative to body 1; original axis[0], relative to body 2
+    dReal m_angle[dSA__MAX];                      // user-supplied angles for axes
+    dxJointLimitMotor m_limot[dJBR__MAX];         // limit+motor info for axes
+};
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/ball.cpp b/libs/ode-0.16.1/ode/src/joints/ball.cpp
new file mode 100644
index 0000000..c295b85
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/ball.cpp
@@ -0,0 +1,186 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "ball.h"
+#include "joint_internal.h"
+
+//****************************************************************************
+// ball and socket
+
+dxJointBall::dxJointBall( dxWorld *w ) :
+    dxJoint( w )
+{
+    dSetZero( anchor1, 4 );
+    dSetZero( anchor2, 4 );
+    erp = world->global_erp;
+    cfm = world->global_cfm;
+}
+
+
+void 
+dxJointBall::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 3;
+}
+
+
+void
+dxJointBall::getInfo1( dxJoint::Info1 *info )
+{
+    info->m = 3;
+    info->nub = 3;
+}
+
+
+void
+dxJointBall::getInfo2( dReal worldFPS, dReal /*worldERP*/, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    pairRhsCfm[GI2_CFM] = cfm;
+    pairRhsCfm[pairskip + GI2_CFM] = cfm;
+    pairRhsCfm[2 * pairskip + GI2_CFM] = cfm;
+    setBall( this, worldFPS, this->erp, rowskip, J1, J2, pairskip, pairRhsCfm, anchor1, anchor2 );
+}
+
+
+
+
+
+void dJointSetBallAnchor( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointBall* joint = ( dxJointBall* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Ball );
+    setAnchors( joint, x, y, z, joint->anchor1, joint->anchor2 );
+}
+
+
+void dJointSetBallAnchor2( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointBall* joint = ( dxJointBall* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Ball );
+    joint->anchor2[0] = x;
+    joint->anchor2[1] = y;
+    joint->anchor2[2] = z;
+    joint->anchor2[3] = 0;
+}
+
+void dJointGetBallAnchor( dJointID j, dVector3 result )
+{
+    dxJointBall* joint = ( dxJointBall* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Ball );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAnchor2( joint, result, joint->anchor2 );
+    else
+        getAnchor( joint, result, joint->anchor1 );
+}
+
+
+void dJointGetBallAnchor2( dJointID j, dVector3 result )
+{
+    dxJointBall* joint = ( dxJointBall* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Ball );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAnchor( joint, result, joint->anchor1 );
+    else
+        getAnchor2( joint, result, joint->anchor2 );
+}
+
+
+void dxJointBall::set( int num, dReal value )
+{
+    switch ( num )
+    {
+    case dParamCFM:
+        cfm = value;
+        break;
+    case dParamERP:
+        erp = value;
+        break;
+    }
+}
+
+
+dReal dxJointBall::get( int num )
+{
+    switch ( num )
+    {
+    case dParamCFM:
+        return cfm;
+    case dParamERP:
+        return erp;
+    default:
+        return 0;
+    }
+}
+
+
+void dJointSetBallParam( dJointID j, int parameter, dReal value )
+{
+    dxJointBall* joint = ( dxJointBall* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Ball );
+    joint->set( parameter, value );
+}
+
+
+dReal dJointGetBallParam( dJointID j, int parameter )
+{
+    dxJointBall* joint = ( dxJointBall* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Ball );
+    return joint->get( parameter );
+}
+
+
+dJointType
+dxJointBall::type() const
+{
+    return dJointTypeBall;
+}
+
+sizeint
+dxJointBall::size() const
+{
+    return sizeof( *this );
+}
+
+void
+dxJointBall::setRelativeValues()
+{
+    dVector3 anchor;
+    dJointGetBallAnchor(this, anchor);
+    setAnchors( this, anchor[0], anchor[1], anchor[2], anchor1, anchor2 );
+}
+
+
+
diff --git a/libs/ode-0.16.1/ode/src/joints/ball.h b/libs/ode-0.16.1/ode/src/joints/ball.h
new file mode 100644
index 0000000..d8d22a5
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/ball.h
@@ -0,0 +1,54 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_BALL_H_
+#define _ODE_JOINT_BALL_H_
+
+#include "joint.h"
+
+// ball and socket
+
+struct dxJointBall : public dxJoint
+{
+    dVector3 anchor1;   // anchor w.r.t first body
+    dVector3 anchor2;   // anchor w.r.t second body
+    dReal erp;          // error reduction
+    dReal cfm;          // constraint force mix in
+    void set( int num, dReal value );
+    dReal get( int num );
+
+    dxJointBall( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+    virtual void setRelativeValues();
+};
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/contact.cpp b/libs/ode-0.16.1/ode/src/joints/contact.cpp
new file mode 100644
index 0000000..5ab3482
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/contact.cpp
@@ -0,0 +1,361 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "contact.h"
+#include "joint_internal.h"
+
+
+
+ //****************************************************************************
+ // contact
+
+dxJointContact::dxJointContact(dxWorld *w) :
+    dxJoint(w)
+{
+}
+
+
+void
+dxJointContact::getSureMaxInfo(SureMaxInfo* info)
+{
+    // ...as the actual m is very likely to hit the maximum
+    info->max_m = (contact.surface.mode&dContactRolling) ? 6 : 3;
+}
+
+
+void
+dxJointContact::getInfo1(dxJoint::Info1 *info)
+{
+    // make sure mu's >= 0, then calculate number of constraint rows and number
+    // of unbounded rows.
+    int m = 1, nub = 0;
+
+    // Anisotropic sliding and rolling and spinning friction 
+    if (contact.surface.mode & dContactAxisDep) {
+        if (contact.surface.mu < 0) {
+            contact.surface.mu = 0;
+        }
+        else if (contact.surface.mu > 0) {
+            if (contact.surface.mu == dInfinity) { nub++; }
+            m++;
+        }
+
+        if (contact.surface.mu2 < 0) {
+            contact.surface.mu2 = 0;
+        }
+        else if (contact.surface.mu2 > 0) {
+            if (contact.surface.mu2 == dInfinity) { nub++; }
+            m++;
+        }
+
+        if ((contact.surface.mode & dContactRolling) != 0) {
+            if (contact.surface.rho < 0) {
+                contact.surface.rho = 0;
+            }
+            else {
+                if (contact.surface.rho == dInfinity) { nub++; }
+                m++;
+            }
+
+            if (contact.surface.rho2 < 0) {
+                contact.surface.rho2 = 0;
+            }
+            else {
+                if (contact.surface.rho2 == dInfinity) { nub++; }
+                m++;
+            }
+
+            if (contact.surface.rhoN < 0) {
+                contact.surface.rhoN = 0;
+            }
+            else {
+                if (contact.surface.rhoN == dInfinity) { nub++; }
+                m++;
+            }
+        }
+    }
+    else {
+        if (contact.surface.mu < 0) {
+            contact.surface.mu = 0;
+        }
+        else if (contact.surface.mu > 0) {
+            if (contact.surface.mu == dInfinity) { nub += 2; }
+            m += 2;
+        }
+
+        if ((contact.surface.mode & dContactRolling) != 0) {
+            if (contact.surface.rho < 0) {
+                contact.surface.rho = 0;
+            }
+            else {
+                if (contact.surface.rho == dInfinity) { nub += 3; }
+                m += 3;
+            }
+        }
+    }
+
+    the_m = m;
+    info->m = m;
+    info->nub = nub;
+}
+
+
+void
+dxJointContact::getInfo2(dReal worldFPS, dReal worldERP,
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi,
+    int *findex)
+{
+    enum 
+    {
+        ROW_NORMAL,
+
+        ROW__OPTIONAL_MIN,
+    };
+
+    const int surface_mode = contact.surface.mode;
+
+    // set right hand side and cfm value for normal
+    dReal erp = (surface_mode & dContactSoftERP) != 0 ? contact.surface.soft_erp : worldERP;
+    dReal k = worldFPS * erp;
+
+    dReal depth = contact.geom.depth - world->contactp.min_depth;
+    if (depth < 0) depth = 0;
+
+    dReal motionN = (surface_mode & dContactMotionN) != 0 ? contact.surface.motionN : REAL(0.0);
+    const dReal pushout = k * depth + motionN;
+
+    bool apply_bounce = (surface_mode & dContactBounce) != 0 && contact.surface.bounce_vel >= 0;
+    dReal outgoing = 0;
+
+    // note: this cap should not limit bounce velocity
+    const dReal maxvel = world->contactp.max_vel;
+    dReal c = pushout > maxvel ? maxvel : pushout;
+
+    // c1,c2 = contact points with respect to body PORs
+    dVector3 c1, c2 = { 0, };
+
+    // get normal, with sign adjusted for body1/body2 polarity
+    dVector3 normal;
+    if ((flags & dJOINT_REVERSE) != 0) {
+        dCopyNegatedVector3(normal, contact.geom.normal);
+    }
+    else {
+        dCopyVector3(normal, contact.geom.normal);
+    }
+
+    dxBody *b1 = node[1].body;
+    if (b1) {
+        dSubtractVectors3(c2, contact.geom.pos, b1->posr.pos);
+        // set Jacobian for b1 normal
+        dCopyNegatedVector3(J2 + ROW_NORMAL * rowskip + GI2__JL_MIN, normal);
+        dCalcVectorCross3(J2 + ROW_NORMAL * rowskip + GI2__JA_MIN, normal, c2); //== dCalcVectorCross3( J2 + GI2__JA_MIN, c2, normal ); dNegateVector3( J2 + GI2__JA_MIN );
+        if (apply_bounce) {
+            outgoing /*+*/= dCalcVectorDot3(J2 + ROW_NORMAL * rowskip + GI2__JA_MIN, node[1].body->avel)
+                - dCalcVectorDot3(normal, node[1].body->lvel);
+        }
+    }
+
+    dxBody *b0 = node[0].body;
+    dSubtractVectors3(c1, contact.geom.pos, b0->posr.pos);
+    // set Jacobian for b0 normal
+    dCopyVector3(J1 + ROW_NORMAL * rowskip + GI2__JL_MIN, normal);
+    dCalcVectorCross3(J1 + ROW_NORMAL * rowskip + GI2__JA_MIN, c1, normal);
+    if (apply_bounce) {
+        // calculate outgoing velocity (-ve for incoming contact)
+        outgoing += dCalcVectorDot3(J1 + ROW_NORMAL * rowskip + GI2__JA_MIN, node[0].body->avel)
+            + dCalcVectorDot3(normal, node[0].body->lvel);
+    }
+
+    // deal with bounce
+    if (apply_bounce) {
+        dReal negated_outgoing = motionN - outgoing;
+        // only apply bounce if the outgoing velocity is greater than the
+        // threshold, and if the resulting c[rowNormal] exceeds what we already have.
+        dIASSERT(contact.surface.bounce_vel >= 0);
+        if (/*contact.surface.bounce_vel >= 0 &&*/
+            negated_outgoing > contact.surface.bounce_vel) {
+            const dReal newc = contact.surface.bounce * negated_outgoing + motionN;
+            if (newc > c) { c = newc; }
+        }
+    }
+
+    pairRhsCfm[ROW_NORMAL * pairskip + GI2_RHS] = c;
+
+    if ((surface_mode & dContactSoftCFM) != 0) {
+        pairRhsCfm[ROW_NORMAL * pairskip + GI2_CFM] = contact.surface.soft_cfm;
+    }
+
+    // set LCP limits for normal
+    pairLoHi[ROW_NORMAL * pairskip + GI2_LO] = 0;
+    pairLoHi[ROW_NORMAL * pairskip + GI2_HI] = dInfinity;
+
+
+    if (the_m > 1) { // if no friction, there is nothing else to do
+        // now do jacobian for tangential forces
+        dVector3 t1, t2; // two vectors tangential to normal
+
+        if ((surface_mode & dContactFDir1) != 0) {   // use fdir1 ?
+            dCopyVector3(t1, contact.fdir1);
+            dCalcVectorCross3(t2, normal, t1);
+        }
+        else {
+            dPlaneSpace(normal, t1, t2);
+        }
+
+        int row = ROW__OPTIONAL_MIN;
+        int currRowSkip = row * rowskip, currPairSkip = row * pairskip;
+
+        // first friction direction
+        const dReal mu = contact.surface.mu;
+
+        if (mu > 0) {
+            dCopyVector3(J1 + currRowSkip + GI2__JL_MIN, t1);
+            dCalcVectorCross3(J1 + currRowSkip + GI2__JA_MIN, c1, t1);
+
+            if (node[1].body) {
+                dCopyNegatedVector3(J2 + currRowSkip + GI2__JL_MIN, t1);
+                dCalcVectorCross3(J2 + currRowSkip + GI2__JA_MIN, t1, c2); //== dCalcVectorCross3( J2 + rowskip + GI2__JA_MIN, c2, t1 ); dNegateVector3( J2 + rowskip + GI2__JA_MIN );
+            }
+
+            // set right hand side
+            if ((surface_mode & dContactMotion1) != 0) {
+                pairRhsCfm[currPairSkip + GI2_RHS] = contact.surface.motion1;
+            }
+            // set slip (constraint force mixing)
+            if ((surface_mode & dContactSlip1) != 0) {
+                pairRhsCfm[currPairSkip + GI2_CFM] = contact.surface.slip1;
+            }
+
+            // set LCP bounds and friction index. this depends on the approximation
+            // mode
+            pairLoHi[currPairSkip + GI2_LO] = -mu;
+            pairLoHi[currPairSkip + GI2_HI] = mu;
+
+            if ((surface_mode & dContactApprox1_1) != 0) {
+                findex[row] = 0;
+            }
+
+            ++row;
+            currRowSkip += rowskip; currPairSkip += pairskip;
+        }
+
+        // second friction direction
+        const dReal mu2 = (surface_mode & dContactMu2) != 0 ? contact.surface.mu2 : mu;
+
+        if (mu2 > 0) {
+            dCopyVector3(J1 + currRowSkip + GI2__JL_MIN, t2);
+            dCalcVectorCross3(J1 + currRowSkip + GI2__JA_MIN, c1, t2);
+
+            if (node[1].body) {
+                dCopyNegatedVector3(J2 + currRowSkip + GI2__JL_MIN, t2);
+                dCalcVectorCross3(J2 + currRowSkip + GI2__JA_MIN, t2, c2); //== dCalcVectorCross3( J2 + currRowSkip + GI2__JA_MIN, c2, t2 ); dNegateVector3( J2 + currRowSkip + GI2__JA_MIN );
+            }
+
+            // set right hand side
+            if ((surface_mode & dContactMotion2) != 0) {
+                pairRhsCfm[currPairSkip + GI2_RHS] = contact.surface.motion2;
+            }
+            // set slip (constraint force mixing)
+            if ((surface_mode & dContactSlip2) != 0) {
+                pairRhsCfm[currPairSkip + GI2_CFM] = contact.surface.slip2;
+            }
+
+            // set LCP bounds and friction index. this depends on the approximation
+            // mode
+            pairLoHi[currPairSkip + GI2_LO] = -mu2;
+            pairLoHi[currPairSkip + GI2_HI] = mu2;
+
+            if ((surface_mode & dContactApprox1_2) != 0) {
+                findex[row] = 0;
+            }
+
+            ++row;
+            currRowSkip += rowskip; currPairSkip += pairskip;
+        }
+
+        // Handle rolling/spinning friction
+        if ((surface_mode & dContactRolling) != 0) {
+
+            const dReal *const ax[3] = {
+                t1, // Rolling around t1 creates movement parallel to t2
+                t2,
+                normal // Spinning axis
+            };
+
+            const int approx_bits[3] = { dContactApprox1_1, dContactApprox1_2, dContactApprox1_N };
+
+            // Get the coefficients
+            dReal rho[3];
+            rho[0] = contact.surface.rho;
+            if ((surface_mode & dContactAxisDep) != 0) {
+                rho[1] = contact.surface.rho2;
+                rho[2] = contact.surface.rhoN;
+            }
+            else {
+                rho[1] = rho[0];
+                rho[2] = rho[0];
+            }
+
+            for (int i = 0; i != 3; ++i) {
+                if (rho[i] > 0) {
+                    // Set the angular axis
+                    dCopyVector3(J1 + currRowSkip + GI2__JA_MIN, ax[i]);
+
+                    if (b1) {
+                        dCopyNegatedVector3(J2 + currRowSkip + GI2__JA_MIN, ax[i]);
+                    }
+
+                    // Set the lcp limits
+                    pairLoHi[currPairSkip + GI2_LO] = -rho[i];
+                    pairLoHi[currPairSkip + GI2_HI] = rho[i];
+
+                    // Should we use proportional force?
+                    if ((surface_mode & approx_bits[i]) != 0) {
+                        // Make limits proportional to normal force
+                        findex[row] = 0;
+                    }
+
+                    ++row;
+                    currRowSkip += rowskip; currPairSkip += pairskip;
+                }
+            }
+        }
+    }
+}
+
+dJointType
+dxJointContact::type() const
+{
+    return dJointTypeContact;
+}
+
+
+sizeint
+dxJointContact::size() const
+{
+    return sizeof(*this);
+}
+
diff --git a/libs/ode-0.16.1/ode/src/joints/contact.h b/libs/ode-0.16.1/ode/src/joints/contact.h
new file mode 100644
index 0000000..604a4fb
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/contact.h
@@ -0,0 +1,48 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_CONTACT_H_
+#define _ODE_JOINT_CONTACT_H_
+
+#include "joint.h"
+
+// contact
+
+struct dxJointContact : public dxJoint
+{
+    int the_m;   // number of rows computed by getInfo1
+    dContact contact;
+
+    dxJointContact( dxWorld* w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex);
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+};
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/dball.cpp b/libs/ode-0.16.1/ode/src/joints/dball.cpp
new file mode 100644
index 0000000..3754646
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/dball.cpp
@@ -0,0 +1,314 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "dball.h"
+#include "joint_internal.h"
+
+/*
+ * Double Ball joint: tries to maintain a fixed distance between two anchor
+ * points.
+ */
+
+dxJointDBall::dxJointDBall(dxWorld *w) :
+    dxJoint(w)
+{
+    dSetZero(anchor1, 3);
+    dSetZero(anchor2, 3);
+    targetDistance = 0;
+    erp = world->global_erp;
+    cfm = world->global_cfm;
+}
+
+void
+dxJointDBall::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 1;
+}
+void
+dxJointDBall::getInfo1( dxJoint::Info1 *info )
+{
+    info->m = 1;
+    info->nub = 1;
+}
+
+void
+dxJointDBall::getInfo2( dReal worldFPS, dReal /*worldERP*/, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    dVector3 globalA1, globalA2;
+    dBodyGetRelPointPos(node[0].body, anchor1[0], anchor1[1], anchor1[2], globalA1);
+    
+    if (node[1].body) {
+        dBodyGetRelPointPos(node[1].body, anchor2[0], anchor2[1], anchor2[2], globalA2);
+    } else {
+        dCopyVector3(globalA2, anchor2);
+    }
+
+    dVector3 q;
+    dSubtractVectors3(q, globalA1, globalA2);
+
+#ifdef dSINGLE
+    const dReal MIN_LENGTH = REAL(1e-7);
+#else
+    const dReal MIN_LENGTH = REAL(1e-12);
+#endif
+
+    if (dCalcVectorLength3(q) < MIN_LENGTH) {
+        // too small, let's choose an arbitrary direction
+        // heuristic: difference in velocities at anchors
+        dVector3 v1, v2;
+        dBodyGetPointVel(node[0].body, globalA1[0], globalA1[1], globalA1[2], v1);
+    
+        if (node[1].body) {
+            dBodyGetPointVel(node[1].body, globalA2[0], globalA2[1], globalA2[2], v2);
+        } else {
+            dZeroVector3(v2);
+        }
+
+        dSubtractVectors3(q, v1, v2);
+
+        if (dCalcVectorLength3(q) < MIN_LENGTH) {
+            // this direction is as good as any
+            dAssignVector3(q, 1, 0, 0);
+        }
+    }
+    dNormalize3(q);
+
+    dCopyVector3(J1 + GI2__JL_MIN, q);
+
+    dVector3 relA1;
+    dBodyVectorToWorld(node[0].body,
+                       anchor1[0], anchor1[1], anchor1[2],
+                       relA1);
+
+    dMatrix3 a1m;
+    dZeroMatrix3(a1m);
+    dSetCrossMatrixMinus(a1m, relA1, 4);
+
+    dMultiply1_331(J1 + GI2__JA_MIN, a1m, q);
+
+    if (node[1].body) {
+        dCopyNegatedVector3(J2 + GI2__JL_MIN, q);
+
+        dVector3 relA2;
+        dBodyVectorToWorld(node[1].body,
+                           anchor2[0], anchor2[1], anchor2[2],
+                           relA2);
+        dMatrix3 a2m;
+        dZeroMatrix3(a2m);
+        dSetCrossMatrixPlus(a2m, relA2, 4);
+        dMultiply1_331(J2 + GI2__JA_MIN, a2m, q);
+    }
+    
+    const dReal k = worldFPS * this->erp;
+    pairRhsCfm[GI2_RHS] = k * (targetDistance - dCalcPointsDistance3(globalA1, globalA2));
+    pairRhsCfm[GI2_CFM] = this->cfm;
+}
+
+
+void
+dxJointDBall::updateTargetDistance()
+{
+    dVector3 p1, p2;
+
+    if (node[0].body)
+        dBodyGetRelPointPos(node[0].body, anchor1[0], anchor1[1], anchor1[2], p1);
+    else
+        dCopyVector3(p1, anchor1);
+    if (node[1].body)
+        dBodyGetRelPointPos(node[1].body, anchor2[0], anchor2[1], anchor2[2], p2);
+    else
+        dCopyVector3(p2, anchor2);
+
+    targetDistance = dCalcPointsDistance3(p1, p2);
+}
+
+
+void dJointSetDBallAnchor1( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointDBall* joint = static_cast<dxJointDBall*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    if ( joint->flags & dJOINT_REVERSE ) {
+        if (joint->node[1].body)
+            dBodyGetPosRelPoint(joint->node[1].body, x, y, z, joint->anchor2);
+        else {
+            joint->anchor2[0] = x;
+            joint->anchor2[1] = y;
+            joint->anchor2[2] = z;
+        }
+    } else {
+        if (joint->node[0].body)
+            dBodyGetPosRelPoint(joint->node[0].body, x, y, z, joint->anchor1);
+        else {
+            joint->anchor1[0] = x;
+            joint->anchor1[1] = y;
+            joint->anchor1[2] = z;
+        }
+    }
+
+    joint->updateTargetDistance();
+}
+
+
+void dJointSetDBallAnchor2( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointDBall* joint = static_cast<dxJointDBall*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+
+    if ( joint->flags & dJOINT_REVERSE ) {
+        if (joint->node[0].body)
+            dBodyGetPosRelPoint(joint->node[0].body, x, y, z, joint->anchor1);
+        else {
+            joint->anchor1[0] = x;
+            joint->anchor1[1] = y;
+            joint->anchor1[2] = z;
+        }
+    } else {
+        if (joint->node[1].body)
+            dBodyGetPosRelPoint(joint->node[1].body, x, y, z, joint->anchor2);
+        else {
+            joint->anchor2[0] = x;
+            joint->anchor2[1] = y;
+            joint->anchor2[2] = z;
+        }
+    }
+
+    joint->updateTargetDistance();
+}
+
+dReal dJointGetDBallDistance(dJointID j)
+{
+    dxJointDBall* joint = static_cast<dxJointDBall*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    return joint->targetDistance;
+}
+
+void dJointSetDBallDistance(dJointID j, dReal dist)
+{
+    dxJointDBall* joint = static_cast<dxJointDBall*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( dist>=0, "target distance must be non-negative" );
+
+    joint->targetDistance = dist;
+}
+
+
+void dJointGetDBallAnchor1( dJointID j, dVector3 result )
+{
+    dxJointDBall* joint = static_cast<dxJointDBall*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+
+    if ( joint->flags & dJOINT_REVERSE ) {
+        if (joint->node[1].body)
+            dBodyGetRelPointPos(joint->node[1].body, joint->anchor2[0], joint->anchor2[1], joint->anchor2[2], result);
+        else
+            dCopyVector3(result, joint->anchor2);
+    } else {
+        if (joint->node[0].body)
+            dBodyGetRelPointPos(joint->node[0].body, joint->anchor1[0], joint->anchor1[1], joint->anchor1[2], result);
+        else
+            dCopyVector3(result, joint->anchor1);
+    }
+}
+
+
+void dJointGetDBallAnchor2( dJointID j, dVector3 result )
+{
+    dxJointDBall* joint = static_cast<dxJointDBall*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+
+    if ( joint->flags & dJOINT_REVERSE ) {
+        if (joint->node[0].body)
+            dBodyGetRelPointPos(joint->node[0].body, joint->anchor1[0], joint->anchor1[1], joint->anchor1[2], result);
+        else
+            dCopyVector3(result, joint->anchor1);
+    } else {
+        if (joint->node[1].body)
+            dBodyGetRelPointPos(joint->node[1].body, joint->anchor2[0], joint->anchor2[1], joint->anchor2[2], result);
+        else
+            dCopyVector3(result, joint->anchor2);
+    }
+}
+
+
+void dJointSetDBallParam( dJointID j, int parameter, dReal value )
+{
+    dxJointDBall* joint = static_cast<dxJointDBall*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    switch ( parameter ) {
+        case dParamCFM:
+            joint->cfm = value;
+            break;
+        case dParamERP:
+            joint->erp = value;
+            break;
+    }
+}
+
+
+dReal dJointGetDBallParam( dJointID j, int parameter )
+{
+    dxJointDBall* joint = static_cast<dxJointDBall*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    switch ( parameter ) {
+        case dParamCFM:
+            return joint->cfm;
+        case dParamERP:
+            return joint->erp;
+        default:
+            return 0;
+    }
+}
+
+
+dJointType
+dxJointDBall::type() const
+{
+    return dJointTypeDBall;
+}
+
+sizeint
+dxJointDBall::size() const
+{
+    return sizeof( *this );
+}
+
+void
+dxJointDBall::setRelativeValues()
+{
+    updateTargetDistance();
+}
+
+
+
diff --git a/libs/ode-0.16.1/ode/src/joints/dball.h b/libs/ode-0.16.1/ode/src/joints/dball.h
new file mode 100644
index 0000000..e52fc6c
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/dball.h
@@ -0,0 +1,58 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_DBALL_H_
+#define _ODE_JOINT_DBALL_H_
+
+#include "joint.h"
+
+// ball and socket
+
+struct dxJointDBall : public dxJoint
+{
+    dVector3 anchor1;   // anchor w.r.t first body
+    dVector3 anchor2;   // anchor w.r.t second body
+    dReal erp;          // error reduction
+    dReal cfm;          // constraint force mix in
+    dReal targetDistance;
+
+    void set( int num, dReal value );
+    dReal get( int num );
+
+    void updateTargetDistance();
+
+    dxJointDBall( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+    virtual void setRelativeValues();
+};
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/dhinge.cpp b/libs/ode-0.16.1/ode/src/joints/dhinge.cpp
new file mode 100644
index 0000000..e300bf5
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/dhinge.cpp
@@ -0,0 +1,220 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "dhinge.h"
+#include "joint_internal.h"
+
+/*
+ * Double Hinge joint
+ */
+
+dxJointDHinge::dxJointDHinge(dxWorld* w) :
+    dxJointDBall(w)
+{
+    dSetZero(axis1, 3);
+    dSetZero(axis2, 3);
+}
+
+
+void
+dxJointDHinge::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 4;
+}
+
+
+void
+dxJointDHinge::getInfo1( dxJoint::Info1* info )
+{
+    info->m = 4;
+    info->nub = 4;
+}
+
+
+void
+dxJointDHinge::getInfo2( dReal worldFPS, dReal worldERP, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    dxJointDBall::getInfo2( worldFPS, worldERP, rowskip, J1, J2, pairskip, pairRhsCfm, pairLoHi, findex ); // sets row0
+    
+    dVector3 globalAxis1;
+    dBodyVectorToWorld(node[0].body, axis1[0], axis1[1], axis1[2], globalAxis1);
+
+    dxBody *body1 = node[1].body;
+
+    // angular constraints, perpendicular to axis
+    dVector3 p, q;
+    dPlaneSpace(globalAxis1, p, q);
+
+    dCopyVector3(J1 + rowskip + GI2__JA_MIN, p);
+    if ( body1 ) {
+        dCopyNegatedVector3(J2 + rowskip + GI2__JA_MIN, p);
+    }
+
+    dCopyVector3(J1 + 2 * rowskip + GI2__JA_MIN, q);
+    if ( body1 ) {
+        dCopyNegatedVector3(J2 + 2 * rowskip + GI2__JA_MIN, q);
+    }
+
+    dVector3 globalAxis2;
+    if ( body1 ) {
+        dBodyVectorToWorld(body1, axis2[0], axis2[1], axis2[2], globalAxis2);
+    } else {
+        dCopyVector3(globalAxis2, axis2);
+    }
+    
+    // similar to the hinge joint
+    dVector3 u;
+    dCalcVectorCross3(u, globalAxis1, globalAxis2);
+
+    const dReal k = worldFPS * this->erp;
+    pairRhsCfm[pairskip + GI2_RHS] = k * dCalcVectorDot3( u, p );
+    pairRhsCfm[2 * pairskip + GI2_RHS] = k * dCalcVectorDot3( u, q );
+
+
+
+
+    /*
+     * Constraint along the axis: translation along it should couple angular movement.
+     * This is just the ball-and-socket derivation, projected onto the hinge axis,
+     * producing a single constraint at the end.
+     *
+     * The choice of "ball" position can be arbitrary; we could place it at the center
+     * of one of the bodies, canceling out its rotational jacobian; or we could make
+     * everything symmetrical by just placing at the midpoint between the centers.
+     *
+     * I like symmetry, so I'll use the second approach here. I'll call the midpoint h.
+     *
+     * Of course, if the second body is NULL, the first body is pretty much locked
+     * along this axis, and the linear constraint is enough.
+     */
+
+    int rowskip_mul_3 = 3 * rowskip;
+    dCopyVector3(J1 + rowskip_mul_3 + GI2__JL_MIN, globalAxis1);
+
+    if ( body1 ) {
+        dVector3 h;
+        dAddScaledVectors3(h, node[0].body->posr.pos, body1->posr.pos, -0.5, 0.5);
+
+        dCalcVectorCross3(J1 + rowskip_mul_3 + GI2__JA_MIN, h, globalAxis1);
+
+        dCopyNegatedVector3(J2 + rowskip_mul_3 + GI2__JL_MIN, globalAxis1);
+        dCopyVector3(J2 + rowskip_mul_3 + GI2__JA_MIN, J1 + rowskip_mul_3 + GI2__JA_MIN);
+    }
+
+    // error correction: both anchors should lie on the same plane perpendicular to the axis
+    dVector3 globalA1, globalA2;
+    dBodyGetRelPointPos(node[0].body, anchor1[0], anchor1[1], anchor1[2], globalA1);
+
+    if ( body1 ) {
+        dBodyGetRelPointPos(body1, anchor2[0], anchor2[1], anchor2[2], globalA2);
+    } else {
+        dCopyVector3(globalA2, anchor2);
+    }
+
+    dVector3 d;
+    dSubtractVectors3(d, globalA1, globalA2); // displacement error
+    pairRhsCfm[3 * pairskip + GI2_RHS] = -k * dCalcVectorDot3(globalAxis1, d);
+}
+
+void dJointSetDHingeAxis( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointDHinge* joint = static_cast<dxJointDHinge*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    dBodyVectorFromWorld(joint->node[0].body, x, y, z, joint->axis1);
+    if (joint->node[1].body)
+        dBodyVectorFromWorld(joint->node[1].body, x, y, z, joint->axis2);
+    else {
+        joint->axis2[0] = x;
+        joint->axis2[1] = y;
+        joint->axis2[2] = z;
+    }
+    dNormalize3(joint->axis1);
+    dNormalize3(joint->axis2);
+}
+
+void dJointGetDHingeAxis( dJointID j, dVector3 result )
+{
+    dxJointDHinge* joint = static_cast<dxJointDHinge*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    dBodyVectorToWorld(joint->node[0].body, joint->axis1[0], joint->axis1[1], joint->axis1[2], result);
+}
+
+
+void dJointSetDHingeAnchor1( dJointID j, dReal x, dReal y, dReal z )
+{
+    dJointSetDBallAnchor1(j, x, y, z);
+}
+
+
+void dJointSetDHingeAnchor2( dJointID j, dReal x, dReal y, dReal z )
+{
+    dJointSetDBallAnchor2(j, x, y, z);
+}
+
+dReal dJointGetDHingeDistance(dJointID j)
+{
+    return dJointGetDBallDistance(j);
+}
+
+
+void dJointGetDHingeAnchor1( dJointID j, dVector3 result )
+{
+    dJointGetDBallAnchor1(j, result);
+}
+
+
+void dJointGetDHingeAnchor2( dJointID j, dVector3 result )
+{
+    dJointGetDBallAnchor2(j, result);
+}
+
+
+void dJointSetDHingeParam( dJointID j, int parameter, dReal value )
+{
+    dJointSetDBallParam(j, parameter, value);
+}
+
+
+dReal dJointGetDHingeParam( dJointID j, int parameter )
+{
+    return dJointGetDBallParam(j, parameter);
+}
+
+dJointType
+dxJointDHinge::type() const
+{
+    return dJointTypeDHinge;
+}
+
+sizeint
+dxJointDHinge::size() const
+{
+    return sizeof( *this );
+}
diff --git a/libs/ode-0.16.1/ode/src/joints/dhinge.h b/libs/ode-0.16.1/ode/src/joints/dhinge.h
new file mode 100644
index 0000000..efc5688
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/dhinge.h
@@ -0,0 +1,46 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_DHINGE_
+#define _ODE_JOINT_DHINGE_
+
+#include "dball.h"
+
+struct dxJointDHinge : public dxJointDBall 
+{
+    dVector3 axis1, axis2;
+    
+    dxJointDHinge(dxWorld *w);
+
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+};
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/joints/fixed.cpp b/libs/ode-0.16.1/ode/src/joints/fixed.cpp
new file mode 100644
index 0000000..527bf48
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/fixed.cpp
@@ -0,0 +1,216 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "fixed.h"
+#include "joint_internal.h"
+
+
+
+//****************************************************************************
+// fixed joint
+
+dxJointFixed::dxJointFixed ( dxWorld *w ) :
+    dxJoint ( w )
+{
+    dSetZero ( offset, 4 );
+    dSetZero ( qrel, 4 );
+    erp = world->global_erp;
+    cfm = world->global_cfm;
+}
+
+
+void 
+dxJointFixed::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 6;
+}
+
+
+void
+dxJointFixed::getInfo1 ( dxJoint::Info1 *info )
+{
+    info->m = 6;
+    info->nub = 6;
+}
+
+
+void
+dxJointFixed::getInfo2 ( dReal worldFPS, dReal worldERP, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    // Three rows for orientation
+    setFixedOrientation ( this, worldFPS, worldERP, 
+        rowskip, J1 + dSA__MAX * rowskip, J2 + dSA__MAX * rowskip,
+        pairskip, pairRhsCfm + dSA__MAX * pairskip, qrel );
+
+    // Three rows for position.
+    // set Jacobian
+    J1[GI2_JLX] = 1;
+    J1[rowskip + GI2_JLY] = 1;
+    J1[2 * rowskip + GI2_JLZ] = 1;
+
+    dReal k = worldFPS * this->erp;
+    dxBody *b0 = node[0].body, *b1 = node[1].body;
+
+    dVector3 ofs;
+    dMultiply0_331 ( ofs, b0->posr.R, offset );
+
+    if ( b1 ) {
+        dSetCrossMatrixPlus( J1 + GI2__JA_MIN, ofs, rowskip );
+
+        J2[GI2_JLX] = -1;
+        J2[rowskip + GI2_JLY] = -1;
+        J2[2 * rowskip + GI2_JLZ] = -1;
+    }
+
+    // set right hand side for the first three rows (linear)
+    if ( b1 ) {
+        for ( int j = 0, currPairSkip = 0; j < 3; currPairSkip += pairskip, ++j ) {
+            pairRhsCfm[currPairSkip + GI2_RHS] = k * ( b1->posr.pos[j] - b0->posr.pos[j] + ofs[j] );
+        }
+    } else {
+        for ( int j = 0, currPairSkip = 0; j < 3; currPairSkip += pairskip, ++j ) {
+            pairRhsCfm[currPairSkip + GI2_RHS] = k * ( offset[j] - b0->posr.pos[j] );
+        }
+    }
+
+    dReal cfm = this->cfm;
+    pairRhsCfm[GI2_CFM] = cfm;
+    pairRhsCfm[pairskip + GI2_CFM] = cfm;
+    pairRhsCfm[2 * pairskip + GI2_CFM] = cfm;
+}
+
+
+void dJointSetFixed ( dJointID j )
+{
+    dxJointFixed* joint = ( dxJointFixed* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Fixed );
+    int i;
+
+    // This code is taken from dJointSetSliderAxis(), we should really put the
+    // common code in its own function.
+    // compute the offset between the bodies
+    if ( joint->node[0].body )
+    {
+        if ( joint->node[1].body )
+        {
+            dReal ofs[4];
+            for ( i = 0; i < 4; i++ )
+                ofs[i] = joint->node[0].body->posr.pos[i] - joint->node[1].body->posr.pos[i];
+            dMultiply1_331 ( joint->offset, joint->node[0].body->posr.R, ofs );
+        }
+        else
+        {
+            joint->offset[0] = joint->node[0].body->posr.pos[0];
+            joint->offset[1] = joint->node[0].body->posr.pos[1];
+            joint->offset[2] = joint->node[0].body->posr.pos[2];
+        }
+    }
+
+    joint->computeInitialRelativeRotation();
+}
+
+void dxJointFixed::set ( int num, dReal value )
+{
+    switch ( num )
+    {
+    case dParamCFM:
+        cfm = value;
+        break;
+    case dParamERP:
+        erp = value;
+        break;
+    }
+}
+
+
+dReal dxJointFixed::get ( int num )
+{
+    switch ( num )
+    {
+    case dParamCFM:
+        return cfm;
+    case dParamERP:
+        return erp;
+    default:
+        return 0;
+    }
+}
+
+
+void dJointSetFixedParam ( dJointID j, int parameter, dReal value )
+{
+    dxJointFixed* joint = ( dxJointFixed* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Fixed );
+    joint->set ( parameter, value );
+}
+
+
+dReal dJointGetFixedParam ( dJointID j, int parameter )
+{
+    dxJointFixed* joint = ( dxJointFixed* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Fixed );
+    return joint->get ( parameter );
+}
+
+
+dJointType
+dxJointFixed::type() const
+{
+    return dJointTypeFixed;
+}
+
+
+sizeint
+dxJointFixed::size() const
+{
+    return sizeof ( *this );
+}
+
+void
+dxJointFixed::computeInitialRelativeRotation()
+{
+    if (node[0].body )
+    {
+        if (node[1].body )
+        {
+            dQMultiply1 (qrel, node[0].body->q, node[1].body->q );
+        }
+        else
+        {
+            // set qrel to the transpose of the first body q
+            qrel[0] =  node[0].body->q[0];
+            qrel[1] = -node[0].body->q[1];
+            qrel[2] = -node[0].body->q[2];
+            qrel[3] = -node[0].body->q[3];
+        }
+    }
+}
+
diff --git a/libs/ode-0.16.1/ode/src/joints/fixed.h b/libs/ode-0.16.1/ode/src/joints/fixed.h
new file mode 100644
index 0000000..c0f6932
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/fixed.h
@@ -0,0 +1,54 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_FIXED_H_
+#define _ODE_JOINT_FIXED_H_
+
+#include "joint.h"
+
+
+// fixed
+
+struct dxJointFixed : public dxJoint
+{
+    dQuaternion qrel;   // initial relative rotation body1 -> body2
+    dVector3 offset;    // relative offset between the bodies
+    dReal erp;          // error reduction parameter
+    dReal cfm;          // constraint force mix-in
+    void  set ( int num, dReal value );
+    dReal get ( int num );
+
+    dxJointFixed ( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1 ( Info1* info );
+    virtual void getInfo2 ( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+    void computeInitialRelativeRotation();
+};
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/joints/hinge.cpp b/libs/ode-0.16.1/ode/src/joints/hinge.cpp
new file mode 100644
index 0000000..70dcd78
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/hinge.cpp
@@ -0,0 +1,394 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "hinge.h"
+#include "joint_internal.h"
+
+
+//****************************************************************************
+// hinge
+
+dxJointHinge::dxJointHinge( dxWorld *w ) :
+    dxJoint( w )
+{
+    dSetZero( anchor1, 4 );
+    dSetZero( anchor2, 4 );
+    dSetZero( axis1, 4 );
+    axis1[0] = 1;
+    dSetZero( axis2, 4 );
+    axis2[0] = 1;
+    dSetZero( qrel, 4 );
+    limot.init( world );
+}
+
+
+void 
+dxJointHinge::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 6;
+}
+
+
+void
+dxJointHinge::getInfo1( dxJoint::Info1 *info )
+{
+    info->nub = 5;
+
+    // see if joint is powered
+    if ( limot.fmax > 0 )
+        info->m = 6; // powered hinge needs an extra constraint row
+    else info->m = 5;
+
+    // see if we're at a joint limit.
+    if (( limot.lostop >= -M_PI || limot.histop <= M_PI ) &&
+        limot.lostop <= limot.histop )
+    {
+        dReal angle = getHingeAngle( node[0].body,
+            node[1].body,
+            axis1, qrel );
+        if ( limot.testRotationalLimit( angle ) )
+            info->m = 6;
+    }
+}
+
+
+void dxJointHinge::getInfo2( dReal worldFPS, dReal worldERP, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    // set the three ball-and-socket rows
+    setBall( this, worldFPS, worldERP, rowskip, J1, J2, pairskip, pairRhsCfm, anchor1, anchor2 );
+
+    // set the two hinge rows. the hinge axis should be the only unconstrained
+    // rotational axis, the angular velocity of the two bodies perpendicular to
+    // the hinge axis should be equal. thus the constraint equations are
+    //    p*w1 - p*w2 = 0
+    //    q*w1 - q*w2 = 0
+    // where p and q are unit vectors normal to the hinge axis, and w1 and w2
+    // are the angular velocity vectors of the two bodies.
+
+    dVector3 ax1;  // length 1 joint axis in global coordinates, from 1st body
+    dVector3 p, q; // plane space vectors for ax1
+    dMultiply0_331( ax1, node[0].body->posr.R, axis1 );
+    dPlaneSpace( ax1, p, q );
+
+    dxBody *body1 = node[1].body;
+    
+    int currRowSkip = 3 * rowskip;
+    dCopyVector3(J1 + currRowSkip + GI2__JA_MIN, p);
+    if ( body1 ) {
+        dCopyNegatedVector3(J2 + currRowSkip + GI2__JA_MIN, p);
+    }
+
+    currRowSkip += rowskip;
+    dCopyVector3(J1 + currRowSkip + GI2__JA_MIN, q);
+    if ( body1 ) {
+        dCopyNegatedVector3(J2 + currRowSkip + GI2__JA_MIN, q);
+    }
+
+    // compute the right hand side of the constraint equation. set relative
+    // body velocities along p and q to bring the hinge back into alignment.
+    // if ax1,ax2 are the unit length hinge axes as computed from body1 and
+    // body2, we need to rotate both bodies along the axis u = (ax1 x ax2).
+    // if `theta' is the angle between ax1 and ax2, we need an angular velocity
+    // along u to cover angle erp*theta in one step :
+    //   |angular_velocity| = angle/time = erp*theta / stepsize
+    //                      = (erp*fps) * theta
+    //    angular_velocity  = |angular_velocity| * (ax1 x ax2) / |ax1 x ax2|
+    //                      = (erp*fps) * theta * (ax1 x ax2) / sin(theta)
+    // ...as ax1 and ax2 are unit length. if theta is smallish,
+    // theta ~= sin(theta), so
+    //    angular_velocity  = (erp*fps) * (ax1 x ax2)
+    // ax1 x ax2 is in the plane space of ax1, so we project the angular
+    // velocity to p and q to find the right hand side.
+
+    dVector3 b;
+    if ( body1 ) {
+        dVector3 ax2;
+        dMultiply0_331( ax2, body1->posr.R, axis2 );
+        dCalcVectorCross3( b, ax1, ax2 );
+    } else {
+        dCalcVectorCross3( b, ax1, axis2 );
+    }
+
+    dReal k = worldFPS * worldERP;
+    int currPairSkip = 3 * pairskip;
+    pairRhsCfm[currPairSkip + GI2_RHS] = k * dCalcVectorDot3( b, p );
+    currPairSkip += pairskip;
+    pairRhsCfm[currPairSkip + GI2_RHS] = k * dCalcVectorDot3( b, q );
+
+    // if the hinge is powered, or has joint limits, add in the stuff
+    currRowSkip += rowskip;
+    currPairSkip += pairskip;
+    limot.addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax1, 1 );
+}
+
+
+
+void dJointSetHingeAnchor( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge );
+    setAnchors( joint, x, y, z, joint->anchor1, joint->anchor2 );
+    joint->computeInitialRelativeRotation();
+}
+
+
+void dJointSetHingeAnchorDelta( dJointID j, dReal x, dReal y, dReal z, dReal dx, dReal dy, dReal dz )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge );
+
+    if ( joint->node[0].body )
+    {
+        dReal q[4];
+        q[0] = x - joint->node[0].body->posr.pos[0];
+        q[1] = y - joint->node[0].body->posr.pos[1];
+        q[2] = z - joint->node[0].body->posr.pos[2];
+        q[3] = 0;
+        dMultiply1_331( joint->anchor1, joint->node[0].body->posr.R, q );
+
+        if ( joint->node[1].body )
+        {
+            q[0] = x - joint->node[1].body->posr.pos[0];
+            q[1] = y - joint->node[1].body->posr.pos[1];
+            q[2] = z - joint->node[1].body->posr.pos[2];
+            q[3] = 0;
+            dMultiply1_331( joint->anchor2, joint->node[1].body->posr.R, q );
+        }
+        else
+        {
+            // Move the relative displacement between the passive body and the
+            //  anchor in the same direction as the passive body has just moved
+            joint->anchor2[0] = x + dx;
+            joint->anchor2[1] = y + dy;
+            joint->anchor2[2] = z + dz;
+        }
+    }
+    joint->anchor1[3] = 0;
+    joint->anchor2[3] = 0;
+
+    joint->computeInitialRelativeRotation();
+}
+
+
+
+void dJointSetHingeAxis( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge );
+    setAxes( joint, x, y, z, joint->axis1, joint->axis2 );
+    joint->computeInitialRelativeRotation();
+}
+
+
+void dJointSetHingeAxisOffset( dJointID j, dReal x, dReal y, dReal z, dReal dangle )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge );
+    setAxes( joint, x, y, z, joint->axis1, joint->axis2 );
+    joint->computeInitialRelativeRotation();
+
+    if ( joint->flags & dJOINT_REVERSE ) dangle = -dangle;
+
+    dQuaternion qAngle, qOffset;
+    dQFromAxisAndAngle(qAngle, x, y, z, dangle);
+    dQMultiply3(qOffset, qAngle, joint->qrel);
+    joint->qrel[0] = qOffset[0];
+    joint->qrel[1] = qOffset[1];
+    joint->qrel[2] = qOffset[2];
+    joint->qrel[3] = qOffset[3];
+}
+
+
+
+void dJointGetHingeAnchor( dJointID j, dVector3 result )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Hinge );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAnchor2( joint, result, joint->anchor2 );
+    else
+        getAnchor( joint, result, joint->anchor1 );
+}
+
+
+void dJointGetHingeAnchor2( dJointID j, dVector3 result )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Hinge );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAnchor( joint, result, joint->anchor1 );
+    else
+        getAnchor2( joint, result, joint->anchor2 );
+}
+
+
+void dJointGetHingeAxis( dJointID j, dVector3 result )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Hinge );
+    getAxis( joint, result, joint->axis1 );
+}
+
+
+void dJointSetHingeParam( dJointID j, int parameter, dReal value )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge );
+    joint->limot.set( parameter, value );
+}
+
+
+dReal dJointGetHingeParam( dJointID j, int parameter )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge );
+    return joint->limot.get( parameter );
+}
+
+
+dReal dJointGetHingeAngle( dJointID j )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dAASSERT( joint );
+    checktype( joint, Hinge );
+    if ( joint->node[0].body )
+    {
+        dReal ang = getHingeAngle( joint->node[0].body,
+            joint->node[1].body,
+            joint->axis1,
+            joint->qrel );
+        if ( joint->flags & dJOINT_REVERSE )
+            return -ang;
+        else
+            return ang;
+    }
+    else return 0;
+}
+
+
+dReal dJointGetHingeAngleRate( dJointID j )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dAASSERT( joint );
+    checktype( joint, Hinge );
+    if ( joint->node[0].body )
+    {
+        dVector3 axis;
+        dMultiply0_331( axis, joint->node[0].body->posr.R, joint->axis1 );
+        dReal rate = dCalcVectorDot3( axis, joint->node[0].body->avel );
+        if ( joint->node[1].body ) rate -= dCalcVectorDot3( axis, joint->node[1].body->avel );
+        if ( joint->flags & dJOINT_REVERSE ) rate = - rate;
+        return rate;
+    }
+    else return 0;
+}
+
+
+void dJointAddHingeTorque( dJointID j, dReal torque )
+{
+    dxJointHinge* joint = ( dxJointHinge* )j;
+    dVector3 axis;
+    dAASSERT( joint );
+    checktype( joint, Hinge );
+
+    if ( joint->flags & dJOINT_REVERSE )
+        torque = -torque;
+
+    getAxis( joint, axis, joint->axis1 );
+    axis[0] *= torque;
+    axis[1] *= torque;
+    axis[2] *= torque;
+
+    if ( joint->node[0].body != 0 )
+        dBodyAddTorque( joint->node[0].body, axis[0], axis[1], axis[2] );
+    if ( joint->node[1].body != 0 )
+        dBodyAddTorque( joint->node[1].body, -axis[0], -axis[1], -axis[2] );
+}
+
+
+dJointType
+dxJointHinge::type() const
+{
+    return dJointTypeHinge;
+}
+
+
+
+sizeint
+dxJointHinge::size() const
+{
+    return sizeof( *this );
+}
+
+
+void
+dxJointHinge::setRelativeValues()
+{
+    dVector3 vec;
+    dJointGetHingeAnchor(this, vec);
+    setAnchors( this, vec[0], vec[1], vec[2], anchor1, anchor2 );
+
+    dJointGetHingeAxis(this, vec);
+    setAxes( this,  vec[0], vec[1], vec[2], axis1, axis2 );
+    computeInitialRelativeRotation();
+}
+
+
+/// Compute initial relative rotation body1 -> body2, or env -> body1
+void
+dxJointHinge::computeInitialRelativeRotation()
+{
+    if ( node[0].body )
+    {
+        if ( node[1].body )
+        {
+            dQMultiply1( qrel, node[0].body->q, node[1].body->q );
+        }
+        else
+        {
+            // set qrel to the transpose of the first body q
+            qrel[0] =  node[0].body->q[0];
+            qrel[1] = -node[0].body->q[1];
+            qrel[2] = -node[0].body->q[2];
+            qrel[3] = -node[0].body->q[3];
+        }
+    }
+}
+
diff --git a/libs/ode-0.16.1/ode/src/joints/hinge.h b/libs/ode-0.16.1/ode/src/joints/hinge.h
new file mode 100644
index 0000000..0fb4dba
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/hinge.h
@@ -0,0 +1,57 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_HINGE_H_
+#define _ODE_JOINT_HINGE_H_
+
+#include "joint.h"
+
+
+// hinge
+
+struct dxJointHinge : public dxJoint
+{
+    dVector3 anchor1;   // anchor w.r.t first body
+    dVector3 anchor2;   // anchor w.r.t second body
+    dVector3 axis1;     // axis w.r.t first body
+    dVector3 axis2;     // axis w.r.t second body
+    dQuaternion qrel;   // initial relative rotation body1 -> body2
+    dxJointLimitMotor limot; // limit and motor information
+
+    dxJointHinge( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+    virtual void setRelativeValues();
+
+    void computeInitialRelativeRotation();
+};
+
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/joints/hinge2.cpp b/libs/ode-0.16.1/ode/src/joints/hinge2.cpp
new file mode 100644
index 0000000..89d5e30
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/hinge2.cpp
@@ -0,0 +1,546 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "hinge2.h"
+#include "joint_internal.h"
+
+
+
+
+//****************************************************************************
+// hinge 2. note that this joint must be attached to two bodies for it to work
+
+dReal
+dxJointHinge2::measureAngle1() const
+{
+    // bring axis 2 into first body's reference frame
+    dVector3 p, q;
+    if (node[1].body)
+        dMultiply0_331( p, node[1].body->posr.R, axis2 );
+    else
+        dCopyVector3(p, axis2);
+
+    if (node[0].body)
+        dMultiply1_331( q, node[0].body->posr.R, p );
+    else
+        dCopyVector3(q, p);
+
+    dReal x = dCalcVectorDot3( v1, q );
+    dReal y = dCalcVectorDot3( v2, q );
+    return -dAtan2( y, x );
+}
+
+dReal
+dxJointHinge2::measureAngle2() const
+{
+    // bring axis 1 into second body's reference frame
+    dVector3 p, q;
+    if (node[0].body)
+        dMultiply0_331( p, node[0].body->posr.R, axis1 );
+    else
+        dCopyVector3(p, axis1);
+
+    if (node[1].body)
+        dMultiply1_331( q, node[1].body->posr.R, p );
+    else
+        dCopyVector3(q, p);
+
+    dReal x = dCalcVectorDot3( w1, q );
+    dReal y = dCalcVectorDot3( w2, q );
+    return -dAtan2( y, x );
+}
+
+
+dxJointHinge2::dxJointHinge2( dxWorld *w ) :
+    dxJoint( w )
+{
+    dSetZero( anchor1, 4 );
+    dSetZero( anchor2, 4 );
+    dSetZero( axis1, 4 );
+    axis1[0] = 1;
+    dSetZero( axis2, 4 );
+    axis2[1] = 1;
+    c0 = 0;
+    s0 = 0;
+
+    dSetZero( v1, 4 );
+    v1[0] = 1;
+    dSetZero( v2, 4 );
+    v2[1] = 1;
+
+    limot1.init( world );
+    limot2.init( world );
+
+    susp_erp = world->global_erp;
+    susp_cfm = world->global_cfm;
+
+    flags |= dJOINT_TWOBODIES;
+}
+
+
+void 
+dxJointHinge2::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 6;
+}
+
+
+void
+dxJointHinge2::getInfo1( dxJoint::Info1 *info )
+{
+    info->m = 4;
+    info->nub = 4;
+
+    // see if we're powered or at a joint limit for axis 1
+    limot1.limit = 0;
+    if (( limot1.lostop >= -M_PI || limot1.histop <= M_PI ) &&
+        limot1.lostop <= limot1.histop )
+    {
+        dReal angle = measureAngle1();
+        limot1.testRotationalLimit( angle );
+    }
+    if ( limot1.limit || limot1.fmax > 0 ) info->m++;
+
+    // see if we're powering axis 2 (we currently never limit this axis)
+    limot2.limit = 0;
+    if ( limot2.fmax > 0 ) info->m++;
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+/// Function that computes ax1,ax2 = axis 1 and 2 in global coordinates (they are
+/// relative to body 1 and 2 initially) and then computes the constrained
+/// rotational axis as the cross product of ax1 and ax2.
+/// the sin and cos of the angle between axis 1 and 2 is computed, this comes
+/// from dot and cross product rules.
+///
+/// @param ax1 Will contain the joint axis1 in world frame
+/// @param ax2 Will contain the joint axis2 in world frame
+/// @param axis Will contain the cross product of ax1 x ax2
+/// @param sin_angle
+/// @param cos_angle
+////////////////////////////////////////////////////////////////////////////////
+void
+dxJointHinge2::getAxisInfo(dVector3 ax1, dVector3 ax2, dVector3 axCross,
+                           dReal &sin_angle, dReal &cos_angle) const
+{
+    dMultiply0_331 (ax1, node[0].body->posr.R, axis1);
+    dMultiply0_331 (ax2, node[1].body->posr.R, axis2);
+    dCalcVectorCross3(axCross,ax1,ax2);
+    sin_angle = dSqrt (axCross[0]*axCross[0] + axCross[1]*axCross[1] + axCross[2]*axCross[2]);
+    cos_angle = dCalcVectorDot3 (ax1,ax2);
+}
+
+
+void
+dxJointHinge2::getInfo2( dReal worldFPS, dReal worldERP, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    // get information we need to set the hinge row
+    dReal s, c;
+    dVector3 q;
+
+    dVector3 ax1, ax2;
+    getAxisInfo( ax1, ax2, q, s, c );
+    dNormalize3( q );   // @@@ quicker: divide q by s ?
+
+    // set the three ball-and-socket rows (aligned to the suspension axis ax1)
+    setBall2( this, worldFPS, worldERP, rowskip, J1, J2, pairskip, pairRhsCfm, anchor1, anchor2, ax1, susp_erp );
+    // set parameter for the suspension
+    pairRhsCfm[GI2_CFM] = susp_cfm;
+
+    // set the hinge row
+    int currRowSkip = 3 * rowskip;
+    dCopyVector3(J1 + currRowSkip + GI2__JA_MIN, q);
+    if ( node[1].body ) {
+        dCopyNegatedVector3(J2 + currRowSkip + GI2__JA_MIN, q);
+    }
+
+    // compute the right hand side for the constrained rotational DOF.
+    // axis 1 and axis 2 are separated by an angle `theta'. the desired
+    // separation angle is theta0. sin(theta0) and cos(theta0) are recorded
+    // in the joint structure. the correcting angular velocity is:
+    //   |angular_velocity| = angle/time = erp*(theta0-theta) / stepsize
+    //                      = (erp*fps) * (theta0-theta)
+    // (theta0-theta) can be computed using the following small-angle-difference
+    // approximation:
+    //   theta0-theta ~= tan(theta0-theta)
+    //                 = sin(theta0-theta)/cos(theta0-theta)
+    //                 = (c*s0 - s*c0) / (c*c0 + s*s0)
+    //                 = c*s0 - s*c0         assuming c*c0 + s*s0 ~= 1
+    // where c = cos(theta), s = sin(theta)
+    //       c0 = cos(theta0), s0 = sin(theta0)
+
+    dReal k = worldFPS * worldERP;
+
+    int currPairSkip = 3 * pairskip;
+    pairRhsCfm[currPairSkip + GI2_RHS] = k * ( c0 * s - this->s0 * c );
+
+    currRowSkip += rowskip; currPairSkip += pairskip;
+    // if the axis1 hinge is powered, or has joint limits, add in more stuff
+    if (limot1.addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax1, 1 )) {
+        currRowSkip += rowskip; currPairSkip += pairskip;
+    }
+
+    // if the axis2 hinge is powered, add in more stuff
+    limot2.addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax2, 1 );
+}
+
+
+// compute vectors v1 and v2 (embedded in body1), used to measure angle
+// between body 1 and body 2
+
+void
+dxJointHinge2::makeV1andV2()
+{
+    if ( node[0].body )
+    {
+        // get axis 1 and 2 in global coords
+        dVector3 ax1, ax2, v;
+        dMultiply0_331( ax1, node[0].body->posr.R, axis1 );
+        dMultiply0_331( ax2, node[1].body->posr.R, axis2 );
+
+        // modify axis 2 so it's perpendicular to axis 1
+        dReal k = dCalcVectorDot3( ax1, ax2 );
+        dAddVectorScaledVector3(ax2, ax2, ax1, -k);
+        
+        if (dxSafeNormalize3( ax2 )) {
+            // make v1 = modified axis2, v2 = axis1 x (modified axis2)
+            dCalcVectorCross3( v, ax1, ax2 );
+            dMultiply1_331( v1, node[0].body->posr.R, ax2 );
+            dMultiply1_331( v2, node[0].body->posr.R, v );
+        }
+        else {
+            dUASSERT(false, "Hinge2 axes must be chosen to be linearly independent");
+        }
+    }
+}
+
+// same as above, but for the second axis
+
+void
+dxJointHinge2::makeW1andW2()
+{
+    if ( node[1].body )
+    {
+        // get axis 1 and 2 in global coords
+        dVector3 ax1, ax2, w;
+        dMultiply0_331( ax1, node[0].body->posr.R, axis1 );
+        dMultiply0_331( ax2, node[1].body->posr.R, axis2 );
+
+        // modify axis 1 so it's perpendicular to axis 2
+        dReal k = dCalcVectorDot3( ax2, ax1 );
+        dAddVectorScaledVector3(ax1, ax1, ax2, -k);
+        
+        if (dxSafeNormalize3( ax1 )) {
+            // make w1 = modified axis1, w2 = axis2 x (modified axis1)
+            dCalcVectorCross3( w, ax2, ax1 );
+            dMultiply1_331( w1, node[1].body->posr.R, ax1 );
+            dMultiply1_331( w2, node[1].body->posr.R, w );
+        }
+        else {
+            dUASSERT(false, "Hinge2 axes must be chosen to be linearly independent");
+        }
+    }
+}
+
+
+/*ODE_API */
+void dJointSetHinge2Anchor( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge2 );
+
+    setAnchors( joint, x, y, z, joint->anchor1, joint->anchor2 );
+    
+    joint->makeV1andV2();
+    joint->makeW1andW2();
+}
+
+
+/*ODE_API */
+void dJointSetHinge2Axes (dJointID j, const dReal *axis1/*=[dSA__MAX],=NULL*/, const dReal *axis2/*=[dSA__MAX],=NULL*/)
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge2 );
+
+    dAASSERT(axis1 != NULL || axis2 != NULL);
+    dAASSERT(joint->node[0].body != NULL || axis1 == NULL);
+    dAASSERT(joint->node[1].body != NULL || axis2 == NULL);
+
+    if ( axis1 != NULL )
+    {
+        setAxes(joint, axis1[dSA_X], axis1[dSA_Y], axis1[dSA_Z], joint->axis1, NULL);
+    }
+    
+    if ( axis2 != NULL )
+    {
+        setAxes(joint, axis2[dSA_X], axis2[dSA_Y], axis2[dSA_Z], NULL, joint->axis2);
+    }
+
+    // compute the sin and cos of the angle between axis 1 and axis 2
+    dVector3 ax1, ax2, ax;
+    joint->getAxisInfo( ax1, ax2, ax, joint->s0, joint->c0 );
+
+    joint->makeV1andV2();
+    joint->makeW1andW2();
+}
+
+
+/*ODE_API_DEPRECATED ODE_API */
+void dJointSetHinge2Axis1( dJointID j, dReal x, dReal y, dReal z )
+{
+    dVector3 axis1;
+    axis1[dSA_X] = x; axis1[dSA_Y] = y; axis1[dSA_Z] = z;
+    dJointSetHinge2Axes(j, axis1, NULL);
+}
+
+/*ODE_API_DEPRECATED ODE_API */
+void dJointSetHinge2Axis2( dJointID j, dReal x, dReal y, dReal z )
+{
+    dVector3 axis2;
+    axis2[dSA_X] = x; axis2[dSA_Y] = y; axis2[dSA_Z] = z;
+    dJointSetHinge2Axes(j, NULL, axis2);
+}
+
+
+void dJointSetHinge2Param( dJointID j, int parameter, dReal value )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge2 );
+    if (( parameter & 0xff00 ) == 0x100 )
+    {
+        joint->limot2.set( parameter & 0xff, value );
+    }
+    else
+    {
+        if ( parameter == dParamSuspensionERP ) joint->susp_erp = value;
+        else if ( parameter == dParamSuspensionCFM ) joint->susp_cfm = value;
+        else joint->limot1.set( parameter, value );
+    }
+}
+
+
+void dJointGetHinge2Anchor( dJointID j, dVector3 result )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Hinge2 );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAnchor2( joint, result, joint->anchor2 );
+    else
+        getAnchor( joint, result, joint->anchor1 );
+}
+
+
+void dJointGetHinge2Anchor2( dJointID j, dVector3 result )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Hinge2 );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAnchor( joint, result, joint->anchor1 );
+    else
+        getAnchor2( joint, result, joint->anchor2 );
+}
+
+
+void dJointGetHinge2Axis1( dJointID j, dVector3 result )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Hinge2 );
+    if ( joint->node[0].body )
+    {
+        dMultiply0_331( result, joint->node[0].body->posr.R, joint->axis1 );
+    }
+    else
+    {
+        dZeroVector3(result);
+        dUASSERT( false, "the joint does not have first body attached" );
+    }
+}
+
+
+void dJointGetHinge2Axis2( dJointID j, dVector3 result )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Hinge2 );
+    if ( joint->node[1].body )
+    {
+        dMultiply0_331( result, joint->node[1].body->posr.R, joint->axis2 );
+    }
+    else
+    {
+        dZeroVector3(result);
+        dUASSERT( false, "the joint does not have second body attached" );
+    }
+}
+
+
+dReal dJointGetHinge2Param( dJointID j, int parameter )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge2 );
+    if (( parameter & 0xff00 ) == 0x100 )
+    {
+        return joint->limot2.get( parameter & 0xff );
+    }
+    else
+    {
+        if ( parameter == dParamSuspensionERP ) return joint->susp_erp;
+        else if ( parameter == dParamSuspensionCFM ) return joint->susp_cfm;
+        else return joint->limot1.get( parameter );
+    }
+}
+
+
+dReal dJointGetHinge2Angle1( dJointID j )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge2 );
+    return joint->measureAngle1();
+}
+
+
+dReal dJointGetHinge2Angle2( dJointID j )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge2 );
+    return joint->measureAngle2();
+}
+
+
+
+dReal dJointGetHinge2Angle1Rate( dJointID j )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge2 );
+    if ( joint->node[0].body )
+    {
+        dVector3 axis;
+        dMultiply0_331( axis, joint->node[0].body->posr.R, joint->axis1 );
+        dReal rate = dCalcVectorDot3( axis, joint->node[0].body->avel );
+        if ( joint->node[1].body )
+            rate -= dCalcVectorDot3( axis, joint->node[1].body->avel );
+        return rate;
+    }
+    else return 0;
+}
+
+
+dReal dJointGetHinge2Angle2Rate( dJointID j )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge2 );
+    if ( joint->node[0].body && joint->node[1].body )
+    {
+        dVector3 axis;
+        dMultiply0_331( axis, joint->node[1].body->posr.R, joint->axis2 );
+        dReal rate = dCalcVectorDot3( axis, joint->node[0].body->avel );
+        if ( joint->node[1].body )
+            rate -= dCalcVectorDot3( axis, joint->node[1].body->avel );
+        return rate;
+    }
+    else return 0;
+}
+
+
+void dJointAddHinge2Torques( dJointID j, dReal torque1, dReal torque2 )
+{
+    dxJointHinge2* joint = ( dxJointHinge2* )j;
+    dVector3 axis1, axis2;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Hinge2 );
+
+    if ( joint->node[0].body && joint->node[1].body )
+    {
+        dMultiply0_331( axis1, joint->node[0].body->posr.R, joint->axis1 );
+        dMultiply0_331( axis2, joint->node[1].body->posr.R, joint->axis2 );
+        axis1[0] = axis1[0] * torque1 + axis2[0] * torque2;
+        axis1[1] = axis1[1] * torque1 + axis2[1] * torque2;
+        axis1[2] = axis1[2] * torque1 + axis2[2] * torque2;
+        dBodyAddTorque( joint->node[0].body, axis1[0], axis1[1], axis1[2] );
+        dBodyAddTorque( joint->node[1].body, -axis1[0], -axis1[1], -axis1[2] );
+    }
+}
+
+
+dJointType
+dxJointHinge2::type() const
+{
+    return dJointTypeHinge2;
+}
+
+
+sizeint
+dxJointHinge2::size() const
+{
+    return sizeof( *this );
+}
+
+
+void
+dxJointHinge2::setRelativeValues()
+{
+    dVector3 anchor;
+    dJointGetHinge2Anchor(this, anchor);
+    setAnchors( this, anchor[0], anchor[1], anchor[2], anchor1, anchor2 );
+
+    dVector3 axis;
+
+    if ( node[0].body )
+    {
+        dJointGetHinge2Axis1(this, axis);
+        setAxes( this, axis[0],axis[1],axis[2], axis1, NULL );
+    }
+
+    if ( node[0].body )
+    {
+        dJointGetHinge2Axis2(this, axis);
+        setAxes( this, axis[0],axis[1],axis[2], NULL, axis2 );
+    }
+
+    dVector3 ax1, ax2;
+    getAxisInfo( ax1, ax2, axis, s0, c0 );
+
+    makeV1andV2();
+    makeW1andW2();
+}
diff --git a/libs/ode-0.16.1/ode/src/joints/hinge2.h b/libs/ode-0.16.1/ode/src/joints/hinge2.h
new file mode 100644
index 0000000..06ce240
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/hinge2.h
@@ -0,0 +1,71 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_HINGE2_H_
+#define _ODE_JOINT_HINGE2_H_
+
+#include "joint.h"
+
+
+// hinge 2
+
+struct dxJointHinge2 : public dxJoint
+{
+    dVector3 anchor1;   // anchor w.r.t first body
+    dVector3 anchor2;   // anchor w.r.t second body
+    dVector3 axis1;     // axis 1 w.r.t first body
+    dVector3 axis2;     // axis 2 w.r.t second body
+    dReal c0, s0;       // cos,sin of desired angle between axis 1,2
+    dVector3 v1, v2;    // angle ref vectors embedded in first body
+    dVector3 w1, w2;    // angle ref vectors embedded in second body
+    dxJointLimitMotor limot1; // limit+motor info for axis 1
+    dxJointLimitMotor limot2; // limit+motor info for axis 2
+    dReal susp_erp, susp_cfm; // suspension parameters (erp,cfm)
+
+
+    dReal measureAngle1() const;
+    dReal measureAngle2() const;
+    void makeV1andV2();
+    void makeW1andW2();
+
+    void getAxisInfo(dVector3 ax1, dVector3 ax2, dVector3 axis,
+        dReal &sin_angle, dReal &cos_Angle) const;
+
+
+
+    dxJointHinge2( dxWorld *w );
+
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+    virtual void setRelativeValues();
+};
+
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/joints/joint.cpp b/libs/ode-0.16.1/ode/src/joints/joint.cpp
new file mode 100644
index 0000000..1b7de7a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/joint.cpp
@@ -0,0 +1,931 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+design note: the general principle for giving a joint the option of connecting
+to the static environment (i.e. the absolute frame) is to check the second
+body (joint->node[1].body), and if it is zero then behave as if its body
+transform is the identity.
+
+*/
+
+#include <ode/ode.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "joint.h"
+#include "joint_internal.h"
+#include "util.h"
+
+extern void addObjectToList( dObject *obj, dObject **first );
+
+dxJoint::dxJoint( dxWorld *w ) :
+    dObject( w )
+{
+    //printf("constructing %p\n", this);
+    dIASSERT( w );
+    flags = 0;
+    node[0].joint = this;
+    node[0].body = 0;
+    node[0].next = 0;
+    node[1].joint = this;
+    node[1].body = 0;
+    node[1].next = 0;
+    dSetZero( lambda, 6 );
+
+    addObjectToList( this, ( dObject ** ) &w->firstjoint );
+
+    w->nj++;
+    feedback = 0;
+}
+
+dxJoint::~dxJoint()
+{ }
+
+
+/*virtual */
+void dxJoint::setRelativeValues()
+{
+    // Do nothing
+}
+
+bool dxJoint::isEnabled() const
+{
+    return ( (flags & dJOINT_DISABLED) == 0 &&
+        (node[0].body->invMass > 0 ||
+        (node[1].body && node[1].body->invMass > 0)) );
+}
+
+
+sizeint dxJointGroup::exportJoints(dxJoint **jlist)
+{
+    sizeint i=0;
+    dxJoint *j = (dxJoint*) m_stack.rewind();
+    while (j != NULL) {
+        jlist[i++] = j;
+        j = (dxJoint*) (m_stack.next (j->size()));
+    }
+    return i;
+}
+
+void dxJointGroup::freeAll()
+{
+    m_num = 0;
+    m_stack.freeAll();
+}
+
+
+//****************************************************************************
+// externs
+
+// extern "C" void dBodyAddTorque (dBodyID, dReal fx, dReal fy, dReal fz);
+// extern "C" void dBodyAddForce (dBodyID, dReal fx, dReal fy, dReal fz);
+
+//****************************************************************************
+// utility
+
+// set three "ball-and-socket" rows in the constraint equation, and the
+// corresponding right hand side.
+
+void setBall( dxJoint *joint, dReal fps, dReal erp, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, 
+    dVector3 anchor1, dVector3 anchor2 )
+{
+    // anchor points in global coordinates with respect to body PORs.
+    dVector3 a1, a2;
+
+    // set Jacobian
+    J1[dxJoint::GI2_JLX] = 1;
+    J1[rowskip + dxJoint::GI2_JLY] = 1;
+    J1[2 * rowskip + dxJoint::GI2_JLZ] = 1;
+    dMultiply0_331( a1, joint->node[0].body->posr.R, anchor1 );
+    dSetCrossMatrixMinus( J1 + dxJoint::GI2__JA_MIN, a1, rowskip );
+
+    dxBody *b1 = joint->node[1].body;
+    if ( b1 )
+    {
+        J2[dxJoint::GI2_JLX] = -1;
+        J2[rowskip + dxJoint::GI2_JLY] = -1;
+        J2[2 * rowskip + dxJoint::GI2_JLZ] = -1;
+        dMultiply0_331( a2, b1->posr.R, anchor2 );
+        dSetCrossMatrixPlus( J2 + dxJoint::GI2__JA_MIN, a2, rowskip );
+    }
+
+    // set right hand side
+    dReal k = fps * erp;
+    dxBody *b0 = joint->node[0].body;
+    if ( b1 )
+    {
+        dReal *currRhsCfm = pairRhsCfm;
+        for ( int j = dSA__MIN; j != dSA__MAX; j++ )
+        {
+            currRhsCfm[dxJoint::GI2_RHS] = k * ( a2[j] + b1->posr.pos[j] - a1[j] - b0->posr.pos[j] );
+            currRhsCfm += pairskip;
+        }
+    }
+    else
+    {
+        dReal *currRhsCfm = pairRhsCfm;
+        for ( int j = dSA__MIN; j != dSA__MAX; j++ )
+        {
+            currRhsCfm[dxJoint::GI2_RHS] = k * ( anchor2[j] - a1[j] - b0->posr.pos[j] );
+            currRhsCfm += pairskip;
+        }
+    }
+}
+
+
+// this is like setBall(), except that `axis' is a unit length vector
+// (in global coordinates) that should be used for the first jacobian
+// position row (the other two row vectors will be derived from this).
+// `erp1' is the erp value to use along the axis.
+
+void setBall2( dxJoint *joint, dReal fps, dReal erp, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, 
+    dVector3 anchor1, dVector3 anchor2,
+    dVector3 axis, dReal erp1 )
+{
+    // anchor points in global coordinates with respect to body PORs.
+    dVector3 a1, a2;
+
+    // get vectors normal to the axis. in setBall() axis,q1,q2 is [1 0 0],
+    // [0 1 0] and [0 0 1], which makes everything much easier.
+    dVector3 q1, q2;
+    dPlaneSpace( axis, q1, q2 );
+
+    // set Jacobian
+    dCopyVector3(J1 + dxJoint::GI2__JL_MIN, axis);
+    dCopyVector3(J1 + rowskip + dxJoint::GI2__JL_MIN, q1);
+    dCopyVector3(J1 + 2 * rowskip + dxJoint::GI2__JL_MIN, q2);
+    dMultiply0_331( a1, joint->node[0].body->posr.R, anchor1 );
+    dCalcVectorCross3( J1 + dxJoint::GI2__JA_MIN, a1, axis );
+    dCalcVectorCross3( J1 + rowskip + dxJoint::GI2__JA_MIN, a1, q1 );
+    dCalcVectorCross3( J1 + 2 * rowskip + dxJoint::GI2__JA_MIN, a1, q2 );
+
+    dxBody *b0 = joint->node[0].body;
+    dAddVectors3(a1, a1, b0->posr.pos);
+
+    // set right hand side - measure error along (axis,q1,q2)
+    dReal k1 = fps * erp1;
+    dReal k = fps * erp;
+
+    dxBody *b1 = joint->node[1].body;
+    if ( b1 )
+    {
+        dCopyNegatedVector3(J2 + dxJoint::GI2__JL_MIN, axis);
+        dCopyNegatedVector3(J2 + rowskip + dxJoint::GI2__JL_MIN, q1);
+        dCopyNegatedVector3(J2 + 2 * rowskip + dxJoint::GI2__JL_MIN, q2);
+        dMultiply0_331( a2, b1->posr.R, anchor2 );
+        dCalcVectorCross3( J2 + dxJoint::GI2__JA_MIN, axis, a2 ); //== dCalcVectorCross3( J2 + dxJoint::GI2__J2A_MIN, a2, axis ); dNegateVector3( J2 + dxJoint::GI2__J2A_MIN );
+        dCalcVectorCross3( J2 + rowskip + dxJoint::GI2__JA_MIN, q1, a2 ); //== dCalcVectorCross3( J2 + rowskip + dxJoint::GI2__J2A_MIN, a2, q1 ); dNegateVector3( J2 + rowskip + dxJoint::GI2__J2A_MIN );
+        dCalcVectorCross3( J2 + 2 * rowskip + dxJoint::GI2__JA_MIN, q2, a2 ); //== dCalcVectorCross3( J2 + 2 * rowskip + dxJoint::GI2__J2A_MIN, a2, q2 ); dNegateVector3( J2 + 2 * rowskip + dxJoint::GI2__J2A_MIN );
+
+        dAddVectors3(a2, a2, b1->posr.pos);
+
+        dVector3 a2_minus_a1;
+        dSubtractVectors3(a2_minus_a1, a2, a1);
+        pairRhsCfm[dxJoint::GI2_RHS] = k1 * dCalcVectorDot3( axis, a2_minus_a1 );
+        pairRhsCfm[pairskip + dxJoint::GI2_RHS] = k * dCalcVectorDot3( q1, a2_minus_a1 );
+        pairRhsCfm[2 * pairskip + dxJoint::GI2_RHS] = k * dCalcVectorDot3( q2, a2_minus_a1 );
+    }
+    else
+    {
+        dVector3 anchor2_minus_a1;
+        dSubtractVectors3(anchor2_minus_a1, anchor2, a1);
+        pairRhsCfm[dxJoint::GI2_RHS] = k1 * dCalcVectorDot3( axis, anchor2_minus_a1 );
+        pairRhsCfm[pairskip + dxJoint::GI2_RHS] = k * dCalcVectorDot3( q1, anchor2_minus_a1 );
+        pairRhsCfm[2 * pairskip + dxJoint::GI2_RHS] = k * dCalcVectorDot3( q2, anchor2_minus_a1 );
+    }
+}
+
+
+// set three orientation rows in the constraint equation, and the
+// corresponding right hand side.
+
+void setFixedOrientation( dxJoint *joint, dReal fps, dReal erp, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, 
+    dQuaternion qrel )
+{
+    // 3 rows to make body rotations equal
+    J1[dxJoint::GI2_JAX] = 1;
+    J1[rowskip + dxJoint::GI2_JAY] = 1;
+    J1[2 * rowskip + dxJoint::GI2_JAZ] = 1;
+
+    dxBody *b1 = joint->node[1].body;
+    if ( b1 )
+    {
+        J2[dxJoint::GI2_JAX] = -1;
+        J2[rowskip + dxJoint::GI2_JAY] = -1;
+        J2[2 * rowskip + dxJoint::GI2_JAZ] = -1;
+    }
+
+    // compute the right hand side. the first three elements will result in
+    // relative angular velocity of the two bodies - this is set to bring them
+    // back into alignment. the correcting angular velocity is
+    //   |angular_velocity| = angle/time = erp*theta / stepsize
+    //                      = (erp*fps) * theta
+    //    angular_velocity  = |angular_velocity| * u
+    //                      = (erp*fps) * theta * u
+    // where rotation along unit length axis u by theta brings body 2's frame
+    // to qrel with respect to body 1's frame. using a small angle approximation
+    // for sin(), this gives
+    //    angular_velocity  = (erp*fps) * 2 * v
+    // where the quaternion of the relative rotation between the two bodies is
+    //    q = [cos(theta/2) sin(theta/2)*u] = [s v]
+
+    // get qerr = relative rotation (rotation error) between two bodies
+    dQuaternion qerr, e;
+    dxBody *b0 = joint->node[0].body;
+    if ( b1 )
+    {
+        dQuaternion qq;
+        dQMultiply1( qq, b0->q, b1->q );
+        dQMultiply2( qerr, qq, qrel );
+    }
+    else
+    {
+        dQMultiply3( qerr, b0->q, qrel );
+    }
+    if ( qerr[0] < 0 )
+    {
+        qerr[1] = -qerr[1];  // adjust sign of qerr to make theta small
+        qerr[2] = -qerr[2];
+        qerr[3] = -qerr[3];
+    }
+    dMultiply0_331( e, b0->posr.R, qerr + 1 );  // @@@ bad SIMD padding!
+    dReal k_mul_2 = fps * erp * REAL(2.0);
+    pairRhsCfm[dxJoint::GI2_RHS] = k_mul_2 * e[dSA_X];
+    pairRhsCfm[pairskip + dxJoint::GI2_RHS] = k_mul_2 * e[dSA_Y];
+    pairRhsCfm[2 * pairskip + dxJoint::GI2_RHS] = k_mul_2 * e[dSA_Z];
+}
+
+
+// compute anchor points relative to bodies
+
+void setAnchors( dxJoint *j, dReal x, dReal y, dReal z,
+                dVector3 anchor1, dVector3 anchor2 )
+{
+    dxBody *b0 = j->node[0].body;
+    if ( b0 )
+    {
+        dReal q[4];
+        q[0] = x - b0->posr.pos[0];
+        q[1] = y - b0->posr.pos[1];
+        q[2] = z - b0->posr.pos[2];
+        q[3] = 0;
+        dMultiply1_331( anchor1, b0->posr.R, q );
+
+        dxBody *b1 = j->node[1].body;
+        if ( b1 )
+        {
+            q[0] = x - b1->posr.pos[0];
+            q[1] = y - b1->posr.pos[1];
+            q[2] = z - b1->posr.pos[2];
+            q[3] = 0;
+            dMultiply1_331( anchor2, b1->posr.R, q );
+        }
+        else
+        {
+            anchor2[0] = x;
+            anchor2[1] = y;
+            anchor2[2] = z;
+        }
+    }
+    anchor1[3] = 0;
+    anchor2[3] = 0;
+}
+
+
+// compute axes relative to bodies. either axis1 or axis2 can be 0.
+
+void setAxes( dxJoint *j, dReal x, dReal y, dReal z,
+             dVector3 axis1, dVector3 axis2 )
+{
+    dxBody *b0 = j->node[0].body;
+    if ( b0 )
+    {
+        dReal q[4];
+        q[0] = x;
+        q[1] = y;
+        q[2] = z;
+        q[3] = 0;
+        dNormalize3( q );
+
+        if ( axis1 )
+        {
+            dMultiply1_331( axis1, b0->posr.R, q );
+            axis1[3] = 0;
+        }
+
+        if ( axis2 )
+        {
+            dxBody *b1 = j->node[1].body;
+            if ( b1 )
+            {
+                dMultiply1_331( axis2, b1->posr.R, q );
+            }
+            else
+            {
+                axis2[0] = x;
+                axis2[1] = y;
+                axis2[2] = z;
+            }
+            axis2[3] = 0;
+        }
+    }
+}
+
+
+void getAnchor( dxJoint *j, dVector3 result, dVector3 anchor1 )
+{
+    dxBody *b0 = j->node[0].body;
+    if ( b0 )
+    {
+        dMultiply0_331( result, b0->posr.R, anchor1 );
+        result[0] += b0->posr.pos[0];
+        result[1] += b0->posr.pos[1];
+        result[2] += b0->posr.pos[2];
+    }
+}
+
+
+void getAnchor2( dxJoint *j, dVector3 result, dVector3 anchor2 )
+{
+    dxBody *b1 = j->node[1].body;
+    if ( b1 )
+    {
+        dMultiply0_331( result, b1->posr.R, anchor2 );
+        result[0] += b1->posr.pos[0];
+        result[1] += b1->posr.pos[1];
+        result[2] += b1->posr.pos[2];
+    }
+    else
+    {
+        result[0] = anchor2[0];
+        result[1] = anchor2[1];
+        result[2] = anchor2[2];
+    }
+}
+
+
+void getAxis( dxJoint *j, dVector3 result, dVector3 axis1 )
+{
+    dxBody *b0 = j->node[0].body;
+    if ( b0 )
+    {
+        dMultiply0_331( result, b0->posr.R, axis1 );
+    }
+}
+
+
+void getAxis2( dxJoint *j, dVector3 result, dVector3 axis2 )
+{
+    dxBody *b1 = j->node[1].body;
+    if ( b1 )
+    {
+        dMultiply0_331( result, b1->posr.R, axis2 );
+    }
+    else
+    {
+        result[0] = axis2[0];
+        result[1] = axis2[1];
+        result[2] = axis2[2];
+    }
+}
+
+
+dReal getHingeAngleFromRelativeQuat( dQuaternion qrel, dVector3 axis )
+{
+    // the angle between the two bodies is extracted from the quaternion that
+    // represents the relative rotation between them. recall that a quaternion
+    // q is:
+    //    [s,v] = [ cos(theta/2) , sin(theta/2) * u ]
+    // where s is a scalar and v is a 3-vector. u is a unit length axis and
+    // theta is a rotation along that axis. we can get theta/2 by:
+    //    theta/2 = atan2 ( sin(theta/2) , cos(theta/2) )
+    // but we can't get sin(theta/2) directly, only its absolute value, i.e.:
+    //    |v| = |sin(theta/2)| * |u|
+    //        = |sin(theta/2)|
+    // using this value will have a strange effect. recall that there are two
+    // quaternion representations of a given rotation, q and -q. typically as
+    // a body rotates along the axis it will go through a complete cycle using
+    // one representation and then the next cycle will use the other
+    // representation. this corresponds to u pointing in the direction of the
+    // hinge axis and then in the opposite direction. the result is that theta
+    // will appear to go "backwards" every other cycle. here is a fix: if u
+    // points "away" from the direction of the hinge (motor) axis (i.e. more
+    // than 90 degrees) then use -q instead of q. this represents the same
+    // rotation, but results in the cos(theta/2) value being sign inverted.
+
+    // extract the angle from the quaternion. cost2 = cos(theta/2),
+    // sint2 = |sin(theta/2)|
+    dReal cost2 = qrel[0];
+    dReal sint2 = dSqrt( qrel[1] * qrel[1] + qrel[2] * qrel[2] + qrel[3] * qrel[3] );
+    dReal theta = ( dCalcVectorDot3( qrel + 1, axis ) >= 0 ) ? // @@@ padding assumptions
+        ( 2 * dAtan2( sint2, cost2 ) ) :  // if u points in direction of axis
+        ( 2 * dAtan2( sint2, -cost2 ) );  // if u points in opposite direction
+
+    // the angle we get will be between 0..2*pi, but we want to return angles
+    // between -pi..pi
+    if ( theta > M_PI ) theta -= ( dReal )( 2 * M_PI );
+
+    // the angle we've just extracted has the wrong sign
+    theta = -theta;
+
+    return theta;
+}
+
+
+// given two bodies (body1,body2), the hinge axis that they are connected by
+// w.r.t. body1 (axis), and the initial relative orientation between them
+// (q_initial), return the relative rotation angle. the initial relative
+// orientation corresponds to an angle of zero. if body2 is 0 then measure the
+// angle between body1 and the static frame.
+//
+// this will not return the correct angle if the bodies rotate along any axis
+// other than the given hinge axis.
+
+dReal getHingeAngle( dxBody *body1, dxBody *body2, dVector3 axis,
+                    dQuaternion q_initial )
+{
+    // get qrel = relative rotation between the two bodies
+    dQuaternion qrel;
+    if ( body2 )
+    {
+        dQuaternion qq;
+        dQMultiply1( qq, body1->q, body2->q );
+        dQMultiply2( qrel, qq, q_initial );
+    }
+    else
+    {
+        // pretend body2->q is the identity
+        dQMultiply3( qrel, body1->q, q_initial );
+    }
+
+    return getHingeAngleFromRelativeQuat( qrel, axis );
+}
+
+//****************************************************************************
+// dxJointLimitMotor
+
+void dxJointLimitMotor::init( dxWorld *world )
+{
+    vel = 0;
+    fmax = 0;
+    lostop = -dInfinity;
+    histop = dInfinity;
+    fudge_factor = 1;
+    normal_cfm = world->global_cfm;
+    stop_erp = world->global_erp;
+    stop_cfm = world->global_cfm;
+    bounce = 0;
+    limit = 0;
+    limit_err = 0;
+}
+
+
+void dxJointLimitMotor::set( int num, dReal value )
+{
+    switch ( num )
+    {
+    case dParamLoStop:
+        lostop = value;
+        break;
+    case dParamHiStop:
+        histop = value;
+        break;
+    case dParamVel:
+        vel = value;
+        break;
+    case dParamFMax:
+        if ( value >= 0 ) fmax = value;
+        break;
+    case dParamFudgeFactor:
+        if ( value >= 0 && value <= 1 ) fudge_factor = value;
+        break;
+    case dParamBounce:
+        bounce = value;
+        break;
+    case dParamCFM:
+        normal_cfm = value;
+        break;
+    case dParamStopERP:
+        stop_erp = value;
+        break;
+    case dParamStopCFM:
+        stop_cfm = value;
+        break;
+    }
+}
+
+
+dReal dxJointLimitMotor::get( int num ) const
+{
+    switch ( num )
+    {
+    case dParamLoStop:
+        return lostop;
+    case dParamHiStop:
+        return histop;
+    case dParamVel:
+        return vel;
+    case dParamFMax:
+        return fmax;
+    case dParamFudgeFactor:
+        return fudge_factor;
+    case dParamBounce:
+        return bounce;
+    case dParamCFM:
+        return normal_cfm;
+    case dParamStopERP:
+        return stop_erp;
+    case dParamStopCFM:
+        return stop_cfm;
+    default:
+        return 0;
+    }
+}
+
+
+bool dxJointLimitMotor::testRotationalLimit( dReal angle )
+{
+    if ( angle <= lostop )
+    {
+        limit = 1;
+        limit_err = angle - lostop;
+        return true;
+    }
+    else if ( angle >= histop )
+    {
+        limit = 2;
+        limit_err = angle - histop;
+        return true;
+    }
+    else
+    {
+        limit = 0;
+        return false;
+    }
+}
+
+
+bool dxJointLimitMotor::addLimot( dxJoint *joint,
+    dReal fps, dReal *J1, dReal *J2, dReal *pairRhsCfm, dReal *pairLoHi,
+    const dVector3 ax1, int rotational )
+{
+    // if the joint is powered, or has joint limits, add in the extra row
+    int powered = fmax > 0;
+    if ( powered || limit )
+    {
+        dReal *J1Used = rotational ? J1 + GI2__JA_MIN : J1 + GI2__JL_MIN;
+        dReal *J2Used = rotational ? J2 + GI2__JA_MIN : J2 + GI2__JL_MIN;
+
+        dCopyVector3(J1Used, ax1);
+
+        dxBody *b1 = joint->node[1].body;
+        if ( b1 )
+        {
+            dCopyNegatedVector3(J2Used, ax1);
+        }
+
+        // linear limot torque decoupling step:
+        //
+        // if this is a linear limot (e.g. from a slider), we have to be careful
+        // that the linear constraint forces (+/- ax1) applied to the two bodies
+        // do not create a torque couple. in other words, the points that the
+        // constraint force is applied at must lie along the same ax1 axis.
+        // a torque couple will result in powered or limited slider-jointed free
+        // bodies from gaining angular momentum.
+        // the solution used here is to apply the constraint forces at the point
+        // halfway between the body centers. there is no penalty (other than an
+        // extra tiny bit of computation) in doing this adjustment. note that we
+        // only need to do this if the constraint connects two bodies.
+
+        dVector3 ltd = {0,0,0}; // Linear Torque Decoupling vector (a torque)
+        if ( !rotational && b1 )
+        {
+            dxBody *b0 = joint->node[0].body;
+            dVector3 c;
+            c[0] = REAL( 0.5 ) * ( b1->posr.pos[0] - b0->posr.pos[0] );
+            c[1] = REAL( 0.5 ) * ( b1->posr.pos[1] - b0->posr.pos[1] );
+            c[2] = REAL( 0.5 ) * ( b1->posr.pos[2] - b0->posr.pos[2] );
+            dCalcVectorCross3( ltd, c, ax1 );
+            dCopyVector3(J1 + dxJoint::GI2__JA_MIN, ltd);
+            dCopyVector3(J2 + dxJoint::GI2__JA_MIN, ltd);
+        }
+
+        // if we're limited low and high simultaneously, the joint motor is
+        // ineffective
+        if ( limit && ( lostop == histop ) ) powered = 0;
+
+        if ( powered )
+        {
+            pairRhsCfm[GI2_CFM] = normal_cfm;
+            if ( ! limit )
+            {
+                pairRhsCfm[GI2_RHS] = vel;
+                pairLoHi[GI2_LO] = -fmax;
+                pairLoHi[GI2_HI] = fmax;
+            }
+            else
+            {
+                // the joint is at a limit, AND is being powered. if the joint is
+                // being powered into the limit then we apply the maximum motor force
+                // in that direction, because the motor is working against the
+                // immovable limit. if the joint is being powered away from the limit
+                // then we have problems because actually we need *two* lcp
+                // constraints to handle this case. so we fake it and apply some
+                // fraction of the maximum force. the fraction to use can be set as
+                // a fudge factor.
+
+                dReal fm = fmax;
+                if (( vel > 0 ) || ( vel == 0 && limit == 2 ) ) fm = -fm;
+
+                // if we're powering away from the limit, apply the fudge factor
+                if (( limit == 1 && vel > 0 ) || ( limit == 2 && vel < 0 ) ) fm *= fudge_factor;
+
+                
+                dReal fm_ax1_0 = fm*ax1[0], fm_ax1_1 = fm*ax1[1], fm_ax1_2 = fm*ax1[2];
+                
+                dxBody *b0 = joint->node[0].body;
+                dxWorldProcessContext *world_process_context = b0->world->unsafeGetWorldProcessingContext(); 
+
+                world_process_context->LockForAddLimotSerialization();
+
+                if ( rotational )
+                {
+                    dxBody *b1 = joint->node[1].body;
+                    if ( b1 != NULL ) 
+                    {
+                        dBodyAddTorque( b1, fm_ax1_0, fm_ax1_1, fm_ax1_2 );
+                    }
+
+                    dBodyAddTorque( b0, -fm_ax1_0, -fm_ax1_1, -fm_ax1_2 );
+                }
+                else
+                {
+                    dxBody *b1 = joint->node[1].body;
+                    if ( b1 != NULL )
+                    {
+                        // linear limot torque decoupling step: refer to above discussion
+                        dReal neg_fm_ltd_0 = -fm*ltd[0], neg_fm_ltd_1 = -fm*ltd[1], neg_fm_ltd_2 = -fm*ltd[2];
+                        dBodyAddTorque( b0, neg_fm_ltd_0, neg_fm_ltd_1, neg_fm_ltd_2 );
+                        dBodyAddTorque( b1, neg_fm_ltd_0, neg_fm_ltd_1, neg_fm_ltd_2 );
+
+                        dBodyAddForce( b1, fm_ax1_0, fm_ax1_1, fm_ax1_2 );
+                    }
+
+                    dBodyAddForce( b0, -fm_ax1_0, -fm_ax1_1, -fm_ax1_2 );
+                }
+
+                world_process_context->UnlockForAddLimotSerialization();
+            }
+        }
+
+        if ( limit )
+        {
+            dReal k = fps * stop_erp;
+            pairRhsCfm[GI2_RHS] = -k * limit_err;
+            pairRhsCfm[GI2_CFM] = stop_cfm;
+
+            if ( lostop == histop )
+            {
+                // limited low and high simultaneously
+                pairLoHi[GI2_LO] = -dInfinity;
+                pairLoHi[GI2_HI] = dInfinity;
+            }
+            else
+            {
+                if ( limit == 1 )
+                {
+                    // low limit
+                    pairLoHi[GI2_LO] = 0;
+                    pairLoHi[GI2_HI] = dInfinity;
+                }
+                else
+                {
+                    // high limit
+                    pairLoHi[GI2_LO] = -dInfinity;
+                    pairLoHi[GI2_HI] = 0;
+                }
+
+                // deal with bounce
+                if ( bounce > 0 )
+                {
+                    // calculate joint velocity
+                    dReal vel;
+                    if ( rotational )
+                    {
+                        vel = dCalcVectorDot3( joint->node[0].body->avel, ax1 );
+                        if ( joint->node[1].body )
+                            vel -= dCalcVectorDot3( joint->node[1].body->avel, ax1 );
+                    }
+                    else
+                    {
+                        vel = dCalcVectorDot3( joint->node[0].body->lvel, ax1 );
+                        if ( joint->node[1].body )
+                            vel -= dCalcVectorDot3( joint->node[1].body->lvel, ax1 );
+                    }
+
+                    // only apply bounce if the velocity is incoming, and if the
+                    // resulting c[] exceeds what we already have.
+                    if ( limit == 1 )
+                    {
+                        // low limit
+                        if ( vel < 0 )
+                        {
+                            dReal newc = -bounce * vel;
+                            if ( newc > pairRhsCfm[GI2_RHS] ) pairRhsCfm[GI2_RHS] = newc;
+                        }
+                    }
+                    else
+                    {
+                        // high limit - all those computations are reversed
+                        if ( vel > 0 )
+                        {
+                            dReal newc = -bounce * vel;
+                            if ( newc < pairRhsCfm[GI2_RHS] ) pairRhsCfm[GI2_RHS] = newc;
+                        }
+                    }
+                }
+            }
+        }
+        return true;
+    }
+    return false;
+}
+
+/**
+    This function generalizes the "linear limot torque decoupling"
+    in addLimot to use anchor points provided by the caller.
+
+    This makes it so that the appropriate torques are applied to
+    a body when it's being linearly motored or limited using anchor points
+    that aren't at the center of mass.
+
+    pt1 and pt2 are centered in body coordinates but use global directions.
+    I.e., they are conveniently found within joint code with:
+      getAxis(joint,pt1,anchor1);
+      getAxis2(joint,pt2,anchor2);
+*/
+bool dxJointLimitMotor::addTwoPointLimot( dxJoint *joint, dReal fps,
+    dReal *J1, dReal *J2, dReal *pairRhsCfm, dReal *pairLoHi,
+    const dVector3 ax1, const dVector3 pt1, const dVector3 pt2 )
+{
+    // if the joint is powered, or has joint limits, add in the extra row
+    int powered = fmax > 0;
+    if ( powered || limit )
+    {
+        // Set the linear portion
+        dCopyVector3(J1 + GI2__JL_MIN, ax1);
+        // Set the angular portion (to move the linear constraint 
+        // away from the center of mass).  
+        dCalcVectorCross3(J1 + GI2__JA_MIN, pt1, ax1);
+        // Set the constraints for the second body
+        if ( joint->node[1].body ) {
+            dCopyNegatedVector3(J2 + GI2__JL_MIN, ax1);
+            dCalcVectorCross3(J2 + GI2__JA_MIN, pt2, J2 + GI2__JL_MIN);
+        }
+
+        // if we're limited low and high simultaneously, the joint motor is
+        // ineffective
+        if ( limit && ( lostop == histop ) ) powered = 0;
+
+        if ( powered )
+        {
+            pairRhsCfm[GI2_CFM] = normal_cfm;
+            if ( ! limit )
+            {
+                pairRhsCfm[GI2_RHS] = vel;
+                pairLoHi[GI2_LO] = -fmax;
+                pairLoHi[GI2_HI] = fmax;
+            }
+            else
+            {
+                // the joint is at a limit, AND is being powered. if the joint is
+                // being powered into the limit then we apply the maximum motor force
+                // in that direction, because the motor is working against the
+                // immovable limit. if the joint is being powered away from the limit
+                // then we have problems because actually we need *two* lcp
+                // constraints to handle this case. so we fake it and apply some
+                // fraction of the maximum force. the fraction to use can be set as
+                // a fudge factor.
+
+                dReal fm = fmax;
+                if (( vel > 0 ) || ( vel == 0 && limit == 2 ) ) fm = -fm;
+
+                // if we're powering away from the limit, apply the fudge factor
+                if (( limit == 1 && vel > 0 ) || ( limit == 2 && vel < 0 ) ) fm *= fudge_factor;
+
+               
+                const dReal* tAx1 = J1 + GI2__JA_MIN;
+                dBodyAddForce( joint->node[0].body, -fm*ax1[dSA_X], -fm*ax1[dSA_Y], -fm*ax1[dSA_Z] );
+                dBodyAddTorque( joint->node[0].body, -fm*tAx1[dSA_X], -fm*tAx1[dSA_Y], -fm*tAx1[dSA_Z] );
+
+                if ( joint->node[1].body )
+                {
+                    const dReal* tAx2 = J2 + GI2__JA_MIN;
+                    dBodyAddForce( joint->node[1].body, fm*ax1[dSA_X], fm*ax1[dSA_Y], fm*ax1[dSA_Z] );
+                    dBodyAddTorque( joint->node[1].body, -fm*tAx2[dSA_X], -fm*tAx2[dSA_Y], -fm*tAx2[dSA_Z] );
+                }
+                
+            }
+        }
+
+        if ( limit )
+        {
+            dReal k = fps * stop_erp;
+            pairRhsCfm[GI2_RHS] = -k * limit_err;
+            pairRhsCfm[GI2_CFM] = stop_cfm;
+
+            if ( lostop == histop )
+            {
+                // limited low and high simultaneously
+                pairLoHi[GI2_LO] = -dInfinity;
+                pairLoHi[GI2_HI] = dInfinity;
+            }
+            else
+            {
+                if ( limit == 1 )
+                {
+                    // low limit
+                    pairLoHi[GI2_LO] = 0;
+                    pairLoHi[GI2_HI] = dInfinity;
+                }
+                else
+                {
+                    // high limit
+                    pairLoHi[GI2_LO] = -dInfinity;
+                    pairLoHi[GI2_HI] = 0;
+                }
+
+                // deal with bounce
+                if ( bounce > 0 )
+                {
+                    // calculate relative velocity of the two anchor points
+                    dReal vel = 
+  	                    dCalcVectorDot3( joint->node[0].body->lvel, J1 + GI2__JL_MIN ) +
+  	                    dCalcVectorDot3( joint->node[0].body->avel, J1 + GI2__JA_MIN );
+  	                if (joint->node[1].body) {
+  	                    vel +=
+  	                        dCalcVectorDot3( joint->node[1].body->lvel, J2 + GI2__JL_MIN ) +
+  	                        dCalcVectorDot3( joint->node[1].body->avel, J2 + GI2__JA_MIN );
+  	                }
+
+                    // only apply bounce if the velocity is incoming, and if the
+                    // resulting c[] exceeds what we already have.
+                    if ( limit == 1 )
+                    {
+                        // low limit
+                        if ( vel < 0 )
+                        {
+                            dReal newc = -bounce * vel;
+                            if ( newc > pairRhsCfm[GI2_RHS] ) pairRhsCfm[GI2_RHS] = newc;
+                        }
+                    }
+                    else
+                    {
+                        // high limit - all those computations are reversed
+                        if ( vel > 0 )
+                        {
+                            dReal newc = -bounce * vel;
+                            if ( newc < pairRhsCfm[GI2_RHS] ) pairRhsCfm[GI2_RHS] = newc;
+                        }
+                    }
+                }
+            }
+        }
+        return true;
+    }
+    return false;
+}
+
+
+// Local Variables:
+// mode:c++
+// c-basic-offset:4
+// End:
diff --git a/libs/ode-0.16.1/ode/src/joints/joint.h b/libs/ode-0.16.1/ode/src/joints/joint.h
new file mode 100644
index 0000000..b6aa81e
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/joint.h
@@ -0,0 +1,326 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_H_
+#define _ODE_JOINT_H_
+
+
+#include <ode/contact.h>
+#include "../common.h"
+#include "../objects.h"
+#include "../obstack.h"
+
+
+// joint flags
+enum
+{
+    // if this flag is set, the joint was allocated in a joint group
+    dJOINT_INGROUP = 1,
+
+    // if this flag is set, the joint was attached with arguments (0,body).
+    // our convention is to treat all attaches as (body,0), i.e. so node[0].body
+    // is always nonzero, so this flag records the fact that the arguments were
+    // swapped.
+    dJOINT_REVERSE = 2,
+
+    // if this flag is set, the joint can not have just one body attached to it,
+    // it must have either zero or two bodies attached.
+    dJOINT_TWOBODIES = 4,
+
+    dJOINT_DISABLED = 8
+};
+
+
+enum dJointConnectedBody
+{
+    dJCB__MIN,
+
+    dJCB_FIRST_BODY = dJCB__MIN,
+    dJCB_SECOND_BODY,
+
+    dJCB__MAX,
+
+};
+
+static inline 
+dJointConnectedBody EncodeJointOtherConnectedBody(dJointConnectedBody cbBodyKind)
+{
+    dIASSERT(dIN_RANGE(cbBodyKind, dJCB__MIN, dJCB__MAX));
+    dSASSERT(dJCB__MAX == 2);
+
+    return (dJointConnectedBody)(dJCB_FIRST_BODY + dJCB_SECOND_BODY - cbBodyKind);
+}
+
+/* joint body relativity enumeration */
+enum dJointBodyRelativity 
+{
+    dJBR__MIN,
+
+    dJBR_GLOBAL = dJBR__MIN,
+
+    dJBR__BODIES_MIN,
+
+    dJBR_BODY1 = dJBR__BODIES_MIN + dJCB_FIRST_BODY,
+    dJBR_BODY2 = dJBR__BODIES_MIN + dJCB_SECOND_BODY,
+
+    dJBR__BODIES_MAX = dJBR__BODIES_MIN + dJCB__MAX,
+
+    dJBR__MAX,
+
+    dJBR__DEFAULT = dJBR_GLOBAL,
+    dJBR__BODIES_COUNT = dJBR__BODIES_MAX - dJBR__BODIES_MIN,
+
+};
+
+ODE_PURE_INLINE int dJBREncodeBodyRelativityStatus(int relativity)
+{
+    return dIN_RANGE(relativity, dJBR__BODIES_MIN, dJBR__BODIES_MAX);
+}
+
+ODE_PURE_INLINE dJointBodyRelativity dJBRSwapBodyRelativity(int relativity)
+{
+    dIASSERT(dIN_RANGE(relativity, dJBR__BODIES_MIN, dJBR__BODIES_MAX));
+    return (dJointBodyRelativity)(dJBR_BODY1 + dJBR_BODY2 - relativity);
+}
+
+
+
+
+// there are two of these nodes in the joint, one for each connection to a
+// body. these are node of a linked list kept by each body of it's connecting
+// joints. but note that the body pointer in each node points to the body that
+// makes use of the *other* node, not this node. this trick makes it a bit
+// easier to traverse the body/joint graph.
+
+struct dxJointNode
+{
+    dxJoint *joint;     // pointer to enclosing dxJoint object
+    dxBody *body;       // *other* body this joint is connected to
+    dxJointNode *next;  // next node in body's list of connected joints
+};
+
+
+struct dxJoint : public dObject
+{
+    // naming convention: the "first" body this is connected to is node[0].body,
+    // and the "second" body is node[1].body. if this joint is only connected
+    // to one body then the second body is 0.
+
+    // info returned by getInfo1 function. the constraint dimension is m (<=6).
+    // i.e. that is the total number of rows in the jacobian. `nub' is the
+    // number of unbounded variables (which have lo,hi = -/+ infinity).
+
+    struct Info1
+    {
+        // Structure size should not exceed sizeof(pointer) bytes to have 
+        // to have good memory pattern in dxQuickStepper()
+        uint8 m, nub;
+    };
+
+    // info returned by getInfo2 function
+
+    enum
+    {
+        GI2__J_MIN,
+        GI2__JL_MIN = GI2__J_MIN + dDA__L_MIN,
+
+        GI2_JLX = GI2__J_MIN + dDA_LX,
+        GI2_JLY = GI2__J_MIN + dDA_LY,
+        GI2_JLZ = GI2__J_MIN + dDA_LZ,
+
+        GI2__JL_MAX = GI2__J_MIN + dDA__L_MAX,
+
+        GI2__JA_MIN = GI2__J_MIN + dDA__A_MIN,
+
+        GI2_JAX = GI2__J_MIN + dDA_AX,
+        GI2_JAY = GI2__J_MIN + dDA_AY,
+        GI2_JAZ = GI2__J_MIN + dDA_AZ,
+
+        GI2__JA_MAX = GI2__J_MIN + dDA__A_MAX,
+        GI2__J_MAX = GI2__J_MIN + dDA__MAX,
+    };
+
+    enum
+    {
+        GI2_RHS,
+        GI2_CFM,
+        GI2__RHS_CFM_MAX,
+    };
+
+    enum
+    {
+        GI2_LO,
+        GI2_HI,
+        GI2__LO_HI_MAX,
+    };
+
+    // info returned by getSureMaxInfo function. 
+    // The information is used for memory reservation in calculations.
+
+    struct SureMaxInfo
+    {
+        // The value of `max_m' must ALWAYS be not less than the value of `m'
+        // the getInfo1 call can generate in current joint state. Another 
+        // requirement is that the value should be provided very quickly, 
+        // without the excessive calculations.
+        // If it is hard/impossible to quickly predict the maximal value of `m'
+        // (which is the case for most joint types) the maximum for current 
+        // joint type in general should be returned. If it can be known the `m'
+        // will be smaller, it can save a bit of memory from being reserved 
+        // for calculations if that smaller value is returned.
+
+        uint8 max_m; // Estimate of maximal `m' in Info1
+    };
+
+
+    unsigned flags;             // dJOINT_xxx flags
+    dxJointNode node[2];        // connections to bodies. node[1].body can be 0
+    dJointFeedback *feedback;   // optional feedback structure
+    dReal lambda[6];            // lambda generated by last step
+
+
+    dxJoint( dxWorld *w );
+    virtual ~dxJoint();
+
+    bool GetIsJointReverse() const { return (this->flags & dJOINT_REVERSE) != 0; }
+
+    virtual void getInfo1( Info1* info ) = 0;
+
+    // integrator parameters
+    virtual void getInfo2( 
+        // fps=frames per second (1/stepsize), erp=default error reduction parameter (0..1)
+        dReal worldFPS, dReal worldERP, 
+        // elements to jump from one row to the next in J's
+        int rowskip,
+        // for the first and second body, pointers to two (linear and angular)
+        // n*3 jacobian sub matrices, stored by rows. these matrices will have
+        // been initialized to 0 on entry. if the second body is zero then the
+        // J2xx pointers may be 0.
+        dReal *J1, dReal *J2,
+        // elements to jump from one pair of scalars to the next
+        int pairskip,
+        // right hand sides of the equation J*v = c + cfm * lambda. cfm is the
+        // "constraint force mixing" vector. c is set to zero on entry, cfm is
+        // set to a constant value (typically very small or zero) value on entry.
+        dReal *pairRhsCfm,
+        // lo and hi limits for variables (set to -/+ infinity on entry).
+        dReal *pairLoHi,
+        // findex vector for variables. see the LCP solver interface for a
+        // description of what this does. this is set to -1 on entry.
+        // note that the returned indexes are relative to the first index of
+        // the constraint.
+        int *findex) = 0;
+    // This call quickly!!! estimates maximum value of "m" that could be returned by getInfo1()
+    // See comments at definition of SureMaxInfo for details.
+    virtual void getSureMaxInfo( SureMaxInfo* info ) = 0;
+    virtual dJointType type() const = 0;
+    virtual sizeint size() const = 0;
+
+    /// Set values which are relative with respect to bodies.
+    /// Each dxJoint should redefine it if needed.
+    virtual void setRelativeValues();
+
+    // Test if this joint should be used in the simulation step
+    // (has the enabled flag set, and is attached to at least one dynamic body)
+    bool isEnabled() const;
+};
+
+
+// joint group. NOTE: any joints in the group that have their world destroyed
+// will have their world pointer set to 0.
+
+struct dxJointGroup : public dBase
+{
+    dxJointGroup(): m_num(0), m_stack() {}
+
+    template<class T>
+    T *alloc(dWorldID w)
+    {
+        T *j = (T *)m_stack.alloc(sizeof(T));
+        if (j != NULL) {
+            ++m_num;
+            new(j) T(w);
+            j->flags |= dJOINT_INGROUP;
+        }
+        return j;
+    }
+
+    sizeint getJointCount() const { return m_num; }
+    sizeint exportJoints(dxJoint **jlist);
+
+    void *beginEnum() { return m_stack.rewind(); }
+    void *continueEnum(sizeint num_bytes) { return m_stack.next(num_bytes); }
+
+    void freeAll();
+
+private:
+    sizeint m_num;        // number of joints on the stack
+    dObStack m_stack; // a stack of (possibly differently sized) dxJoint objects.
+};
+
+// common limit and motor information for a single joint axis of movement
+struct dxJointLimitMotor
+{
+    dReal vel, fmax;        // powered joint: velocity, max force
+    dReal lostop, histop;   // joint limits, relative to initial position
+    dReal fudge_factor;     // when powering away from joint limits
+    dReal normal_cfm;       // cfm to use when not at a stop
+    dReal stop_erp, stop_cfm; // erp and cfm for when at joint limit
+    dReal bounce;           // restitution factor
+    // variables used between getInfo1() and getInfo2()
+    int limit;          // 0=free, 1=at lo limit, 2=at hi limit
+    dReal limit_err;    // if at limit, amount over limit
+
+    void init( dxWorld * );
+    void set( int num, dReal value );
+    dReal get( int num ) const;
+    bool testRotationalLimit( dReal angle );
+
+    enum
+    {
+        GI2__JL_MIN = dxJoint::GI2__JL_MIN,
+        GI2__JA_MIN = dxJoint::GI2__JA_MIN,
+        GI2_JAX = dxJoint::GI2_JAX,
+        GI2_JAY = dxJoint::GI2_JAY,
+        GI2_JAZ = dxJoint::GI2_JAZ,
+        GI2_RHS = dxJoint::GI2_RHS,
+        GI2_CFM = dxJoint::GI2_CFM,
+        GI2_LO = dxJoint::GI2_LO,
+        GI2_HI = dxJoint::GI2_HI,
+    };
+
+    bool addLimot( dxJoint *joint, dReal fps, 
+        dReal *J1, dReal *J2, dReal *pairRhsCfm, dReal *pairLoHi,
+        const dVector3 ax1, int rotational );
+    bool addTwoPointLimot( dxJoint *joint, dReal fps,
+        dReal *J1, dReal *J2, dReal *pairRhsCfm, dReal *pairLoHi,
+        const dVector3 ax1, const dVector3 pt1, const dVector3 pt2 );
+};
+
+
+#endif
+
+
+// Local Variables:
+// mode:c++
+// c-basic-offset:4
+// End:
diff --git a/libs/ode-0.16.1/ode/src/joints/joint_internal.h b/libs/ode-0.16.1/ode/src/joints/joint_internal.h
new file mode 100644
index 0000000..30accb6
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/joint_internal.h
@@ -0,0 +1,70 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#ifndef _ODE_JOINT_INTERNAL_H_
+#define _ODE_JOINT_INTERNAL_H_
+
+
+#include <ode/rotation.h>
+#include <ode/objects.h>
+#include "matrix.h"
+#include "odemath.h"
+
+
+#define checktype(j,t) dUASSERT(j->type() == dJointType##t, \
+    "joint type is not " #t)
+
+
+void setBall( dxJoint *joint, dReal fps, dReal erp, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, 
+    dVector3 anchor1, dVector3 anchor2 );
+void setBall2( dxJoint *joint, dReal fps, dReal erp, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, 
+    dVector3 anchor1, dVector3 anchor2,
+    dVector3 axis, dReal erp1 );
+
+void setFixedOrientation( dxJoint *joint, dReal fps, dReal erp, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, 
+    dQuaternion qrel );
+
+
+void setAnchors( dxJoint *j, dReal x, dReal y, dReal z,
+    dVector3 anchor1, dVector3 anchor2 );
+
+void getAnchor( dxJoint *j, dVector3 result, dVector3 anchor1 );
+void getAnchor2( dxJoint *j, dVector3 result, dVector3 anchor2 );
+
+void setAxes( dxJoint *j, dReal x, dReal y, dReal z,
+             dVector3 axis1, dVector3 axis2 );
+void getAxis( dxJoint *j, dVector3 result, dVector3 axis1 );
+void getAxis2( dxJoint *j, dVector3 result, dVector3 axis2 );
+
+
+dReal getHingeAngle( dxBody *body1, dxBody *body2, dVector3 axis, dQuaternion q_initial );
+dReal getHingeAngleFromRelativeQuat( dQuaternion qrel, dVector3 axis );
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/joints.h b/libs/ode-0.16.1/ode/src/joints/joints.h
new file mode 100644
index 0000000..d06af4d
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/joints.h
@@ -0,0 +1,48 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINTS_H_
+#define _ODE_JOINTS_H_
+
+#include <ode/common.h>
+
+#include "joint.h"
+
+#include "ball.h"
+#include "dball.h"
+#include "dhinge.h"
+#include "transmission.h"
+#include "hinge.h"
+#include "slider.h"
+#include "contact.h"
+#include "universal.h"
+#include "hinge2.h"
+#include "fixed.h"
+#include "null.h"
+#include "amotor.h"
+#include "lmotor.h"
+#include "plane2d.h"
+#include "pu.h"
+#include "pr.h"
+#include "piston.h"
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/joints/lmotor.cpp b/libs/ode-0.16.1/ode/src/joints/lmotor.cpp
new file mode 100644
index 0000000..8270188
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/lmotor.cpp
@@ -0,0 +1,214 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "lmotor.h"
+#include "joint_internal.h"
+
+
+//****************************************************************************
+// lmotor joint
+dxJointLMotor::dxJointLMotor( dxWorld *w ) :
+    dxJoint( w )
+{
+    int i;
+    num = 0;
+    for ( i = 0;i < 3;i++ )
+    {
+        dSetZero( axis[i], 4 );
+        limot[i].init( world );
+    }
+}
+
+void
+dxJointLMotor::computeGlobalAxes( dVector3 ax[3] )
+{
+    for ( int i = 0; i < num; i++ )
+    {
+        if ( rel[i] == 1 )
+        {
+            dMultiply0_331( ax[i], node[0].body->posr.R, axis[i] );
+        }
+        else if ( rel[i] == 2 )
+        {
+            if ( node[1].body )   // jds: don't assert, just ignore
+            {
+                dMultiply0_331( ax[i], node[1].body->posr.R, axis[i] );
+            }
+        }
+        else
+        {
+            ax[i][0] = axis[i][0];
+            ax[i][1] = axis[i][1];
+            ax[i][2] = axis[i][2];
+        }
+    }
+}
+
+void 
+dxJointLMotor::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = num;
+}
+
+void
+dxJointLMotor::getInfo1( dxJoint::Info1 *info )
+{
+    info->m = 0;
+    info->nub = 0;
+    for ( int i = 0; i < num; i++ )
+    {
+        if ( limot[i].fmax > 0 )
+        {
+            info->m++;
+        }
+    }
+}
+
+void
+dxJointLMotor::getInfo2( dReal worldFPS, dReal /*worldERP*/, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    dVector3 ax[3];
+    computeGlobalAxes( ax );
+
+    int currRowSkip = 0, currPairSkip = 0;
+    for ( int i = 0; i < num; ++i ) {
+        if (limot[i].addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax[i], 0 )) {
+            currRowSkip += rowskip; currPairSkip += pairskip;
+        }
+    }
+}
+
+void dJointSetLMotorAxis( dJointID j, int anum, int rel, dReal x, dReal y, dReal z )
+{
+    dxJointLMotor* joint = ( dxJointLMotor* )j;
+    //for now we are ignoring rel!
+    dAASSERT( joint && anum >= 0 && anum <= 2 && rel >= 0 && rel <= 2 );
+    checktype( joint, LMotor );
+
+    if ( anum < 0 ) anum = 0;
+    if ( anum > 2 ) anum = 2;
+
+    if ( !joint->node[1].body && rel == 2 ) rel = 1; //ref 1
+
+    joint->rel[anum] = rel;
+
+    dVector3 r;
+    r[0] = x;
+    r[1] = y;
+    r[2] = z;
+    r[3] = 0;
+    if ( rel > 0 )
+    {
+        if ( rel == 1 )
+        {
+            dMultiply1_331( joint->axis[anum], joint->node[0].body->posr.R, r );
+        }
+        else
+        {
+            //second body has to exists thanks to ref 1 line
+            dMultiply1_331( joint->axis[anum], joint->node[1].body->posr.R, r );
+        }
+    }
+    else
+    {
+        joint->axis[anum][0] = r[0];
+        joint->axis[anum][1] = r[1];
+        joint->axis[anum][2] = r[2];
+    }
+
+    dNormalize3( joint->axis[anum] );
+}
+
+void dJointSetLMotorNumAxes( dJointID j, int num )
+{
+    dxJointLMotor* joint = ( dxJointLMotor* )j;
+    dAASSERT( joint && num >= 0 && num <= 3 );
+    checktype( joint, LMotor );
+    if ( num < 0 ) num = 0;
+    if ( num > 3 ) num = 3;
+    joint->num = num;
+}
+
+void dJointSetLMotorParam( dJointID j, int parameter, dReal value )
+{
+    dxJointLMotor* joint = ( dxJointLMotor* )j;
+    dAASSERT( joint );
+    checktype( joint, LMotor );
+    int anum = parameter >> 8;
+    if ( anum < 0 ) anum = 0;
+    if ( anum > 2 ) anum = 2;
+    parameter &= 0xff;
+    joint->limot[anum].set( parameter, value );
+}
+
+int dJointGetLMotorNumAxes( dJointID j )
+{
+    dxJointLMotor* joint = ( dxJointLMotor* )j;
+    dAASSERT( joint );
+    checktype( joint, LMotor );
+    return joint->num;
+}
+
+
+void dJointGetLMotorAxis( dJointID j, int anum, dVector3 result )
+{
+    dxJointLMotor* joint = ( dxJointLMotor* )j;
+    dAASSERT( joint && anum >= 0 && anum < 3 );
+    checktype( joint, LMotor );
+    if ( anum < 0 ) anum = 0;
+    if ( anum > 2 ) anum = 2;
+    result[0] = joint->axis[anum][0];
+    result[1] = joint->axis[anum][1];
+    result[2] = joint->axis[anum][2];
+}
+
+dReal dJointGetLMotorParam( dJointID j, int parameter )
+{
+    dxJointLMotor* joint = ( dxJointLMotor* )j;
+    dAASSERT( joint );
+    checktype( joint, LMotor );
+    int anum = parameter >> 8;
+    if ( anum < 0 ) anum = 0;
+    if ( anum > 2 ) anum = 2;
+    parameter &= 0xff;
+    return joint->limot[anum].get( parameter );
+}
+
+dJointType
+dxJointLMotor::type() const
+{
+    return dJointTypeLMotor;
+}
+
+
+sizeint
+dxJointLMotor::size() const
+{
+    return sizeof( *this );
+}
+
diff --git a/libs/ode-0.16.1/ode/src/joints/lmotor.h b/libs/ode-0.16.1/ode/src/joints/lmotor.h
new file mode 100644
index 0000000..c819a47
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/lmotor.h
@@ -0,0 +1,51 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_LMOTOR_H_
+#define _ODE_JOINT_LMOTOR_H_
+
+#include "joint.h"
+
+struct dxJointLMotor : public dxJoint
+{
+    int num;
+    int rel[3];
+    dVector3 axis[3];
+    dxJointLimitMotor limot[3];
+
+    void computeGlobalAxes( dVector3 ax[3] );
+
+
+    dxJointLMotor( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+};
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/null.cpp b/libs/ode-0.16.1/ode/src/joints/null.cpp
new file mode 100644
index 0000000..315eea9
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/null.cpp
@@ -0,0 +1,74 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "null.h"
+#include "joint_internal.h"
+
+
+
+//****************************************************************************
+// null joint
+dxJointNull::dxJointNull( dxWorld *w ) :
+    dxJoint( w )
+{
+}
+
+void 
+dxJointNull::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 0;
+}
+
+
+void
+dxJointNull::getInfo1( dxJoint::Info1 *info )
+{
+    info->m = 0;
+    info->nub = 0;
+}
+
+
+void
+dxJointNull::getInfo2( dReal /*worldFPS*/, dReal /*worldERP*/, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    dDebug( 0, "this should never get called" );
+}
+
+dJointType
+dxJointNull::type() const
+{
+    return dJointTypeNull;
+}
+
+sizeint
+dxJointNull::size() const
+{
+    return sizeof( *this );
+}
+
+
diff --git a/libs/ode-0.16.1/ode/src/joints/null.h b/libs/ode-0.16.1/ode/src/joints/null.h
new file mode 100644
index 0000000..fb3f629
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/null.h
@@ -0,0 +1,46 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_NULL_H_
+#define _ODE_JOINT_NULL_H_
+
+#include "joint.h"
+
+
+
+// null joint, for testing only
+
+struct dxJointNull : public dxJoint
+{
+    dxJointNull( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+};
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/piston.cpp b/libs/ode-0.16.1/ode/src/joints/piston.cpp
new file mode 100644
index 0000000..3bd7fd0
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/piston.cpp
@@ -0,0 +1,729 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "piston.h"
+#include "joint_internal.h"
+
+
+
+//****************************************************************************
+// Piston
+//
+
+dxJointPiston::dxJointPiston ( dxWorld *w ) :
+    dxJoint ( w )
+{
+    dSetZero ( axis1, 4 );
+    dSetZero ( axis2, 4 );
+
+    axis1[0] = 1;
+    axis2[0] = 1;
+
+    dSetZero ( qrel, 4 );
+
+    dSetZero ( anchor1, 4 );
+    dSetZero ( anchor2, 4 );
+
+    limotP.init ( world );
+
+    limotR.init ( world );
+}
+
+
+dReal dJointGetPistonPosition ( dJointID j )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Piston );
+
+    if ( joint->node[0].body )
+    {
+        dVector3 q;
+        // get the anchor (or offset) in global coordinates
+        dMultiply0_331 ( q, joint->node[0].body->posr.R, joint->anchor1 );
+
+        if ( joint->node[1].body )
+        {
+            dVector3 anchor2;
+            // get the anchor2 in global coordinates
+            dMultiply0_331 ( anchor2, joint->node[1].body->posr.R, joint->anchor2 );
+
+            q[0] = ( ( joint->node[0].body->posr.pos[0] + q[0] ) -
+                ( joint->node[1].body->posr.pos[0] + anchor2[0] ) );
+            q[1] = ( ( joint->node[0].body->posr.pos[1] + q[1] ) -
+                ( joint->node[1].body->posr.pos[1] + anchor2[1] ) );
+            q[2] = ( ( joint->node[0].body->posr.pos[2] + q[2] ) -
+                ( joint->node[1].body->posr.pos[2] + anchor2[2] ) );
+        }
+        else
+        {
+            // N.B. When there is no body 2 the joint->anchor2 is already in
+            //      global coordinates
+            q[0] = ( ( joint->node[0].body->posr.pos[0] + q[0] ) -
+                ( joint->anchor2[0] ) );
+            q[1] = ( ( joint->node[0].body->posr.pos[1] + q[1] ) -
+                ( joint->anchor2[1] ) );
+            q[2] = ( ( joint->node[0].body->posr.pos[2] + q[2] ) -
+                ( joint->anchor2[2] ) );
+
+            if ( joint->flags & dJOINT_REVERSE )
+            {
+                q[0] = -q[0];
+                q[1] = -q[1];
+                q[2] = -q[2];
+            }
+        }
+
+        // get axis in global coordinates
+        dVector3 ax;
+        dMultiply0_331 ( ax, joint->node[0].body->posr.R, joint->axis1 );
+
+        return dCalcVectorDot3 ( ax, q );
+    }
+
+    dDEBUGMSG ( "The function always return 0 since no body are attached" );
+    return 0;
+}
+
+
+dReal dJointGetPistonPositionRate ( dJointID j )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Piston );
+
+    // get axis in global coordinates
+    dVector3 ax;
+    dMultiply0_331 ( ax, joint->node[0].body->posr.R, joint->axis1 );
+
+    // The linear velocity created by the rotation can be discarded since
+    // the rotation is along the prismatic axis and this rotation don't create
+    // linear velocity in the direction of the prismatic axis.
+    if ( joint->node[1].body )
+    {
+        return ( dCalcVectorDot3 ( ax, joint->node[0].body->lvel ) -
+            dCalcVectorDot3 ( ax, joint->node[1].body->lvel ) );
+    }
+    else
+    {
+        dReal rate = dCalcVectorDot3 ( ax, joint->node[0].body->lvel );
+        return ( (joint->flags & dJOINT_REVERSE) ? -rate : rate);
+    }
+}
+
+
+dReal dJointGetPistonAngle ( dJointID j )
+{
+    dxJointPiston* joint = ( dxJointPiston * ) j;
+    dAASSERT ( joint );
+    checktype ( joint, Piston );
+
+    if ( joint->node[0].body )
+    {
+        dReal ang = getHingeAngle ( joint->node[0].body, joint->node[1].body, joint->axis1,
+            joint->qrel );
+        if ( joint->flags & dJOINT_REVERSE )
+            return -ang;
+        else
+            return ang;
+    }
+    else return 0;
+}
+
+
+dReal dJointGetPistonAngleRate ( dJointID j )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dAASSERT ( joint );
+    checktype ( joint, Piston );
+
+    if ( joint->node[0].body )
+    {
+        dVector3 axis;
+        dMultiply0_331 ( axis, joint->node[0].body->posr.R, joint->axis1 );
+        dReal rate = dCalcVectorDot3 ( axis, joint->node[0].body->avel );
+        if ( joint->node[1].body ) rate -= dCalcVectorDot3 ( axis, joint->node[1].body->avel );
+        if ( joint->flags & dJOINT_REVERSE ) rate = - rate;
+        return rate;
+    }
+    else return 0;
+}
+
+
+void 
+dxJointPiston::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 6;
+}
+
+
+void
+dxJointPiston::getInfo1 ( dxJoint::Info1 *info )
+{
+    info->nub = 4; // Number of unbound variables
+    // The only bound variable is one linear displacement
+
+    info->m = 4; // Default number of constraint row
+
+    // see if we're at a joint limit.
+    limotP.limit = 0;
+    if ( ( limotP.lostop > -dInfinity || limotP.histop < dInfinity ) &&
+        limotP.lostop <= limotP.histop )
+    {
+        // measure joint position
+        dReal pos = dJointGetPistonPosition ( this );
+        limotP.testRotationalLimit ( pos );     // N.B. The fucntion is ill named
+    }
+
+    // powered Piston or at limits needs an extra constraint row
+    if ( limotP.limit || limotP.fmax > 0 ) info->m++;
+
+
+    // see if we're at a joint limit.
+    limotR.limit = 0;
+    if ( ( limotR.lostop > -dInfinity || limotR.histop < dInfinity ) &&
+        limotR.lostop <= limotR.histop )
+    {
+        // measure joint position
+        dReal angle = getHingeAngle ( node[0].body, node[1].body, axis1,
+            qrel );
+        limotR.testRotationalLimit ( angle );
+    }
+
+    // powered Piston or at limits needs an extra constraint row
+    if ( limotR.limit || limotR.fmax > 0 ) info->m++;
+
+}
+
+
+void
+dxJointPiston::getInfo2 ( dReal worldFPS, dReal worldERP, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    const dReal k = worldFPS * worldERP;
+
+
+    // Pull out pos and R for both bodies. also get the `connection'
+    // vector pos2-pos1.
+
+    dVector3 dist; // Current position of body_1  w.r.t "anchor"
+    // 2 bodies anchor is center of body 2
+    // 1 bodies anchor is origin
+    dVector3 lanchor2 = { 0,0,0 };
+
+    dReal *pos1 = node[0].body->posr.pos;
+    dReal *R1   = node[0].body->posr.R;
+    dReal *R2 = NULL;
+
+    dxBody *body1 = node[1].body;
+
+    if ( body1 ) 
+    {
+        dReal *pos2 = body1->posr.pos;
+        R2   = body1->posr.R;
+
+        dMultiply0_331 ( lanchor2, R2, anchor2 );
+        dist[0] = lanchor2[0] + pos2[0] - pos1[0];
+        dist[1] = lanchor2[1] + pos2[1] - pos1[1];
+        dist[2] = lanchor2[2] + pos2[2] - pos1[2];
+    } 
+    else 
+    {
+        // pos2 = 0; // N.B. We can do that to be safe but it is no necessary
+        // R2 = 0;   // N.B. We can do that to be safe but it is no necessary
+        if ( (flags & dJOINT_REVERSE) != 0 )
+        {
+            dSubtractVectors3(dist, pos1, anchor2); // Invert the value
+        }
+        else
+        {
+            dSubtractVectors3(dist, anchor2, pos1);
+        }
+    }
+
+    // ======================================================================
+    // Work on the angular part (i.e. row 0, 1)
+    // Set the two orientation rows. The rotoide axis should be the only
+    // unconstrained rotational axis, the angular velocity of the two bodies
+    // perpendicular to the rotoide axis should be equal.
+    // Thus the constraint equations are:
+    //    p*w1 - p*w2 = 0
+    //    q*w1 - q*w2 = 0
+    // where p and q are unit vectors normal to the rotoide axis, and w1 and w2
+    // are the angular velocity vectors of the two bodies.
+    // Since the rotoide axis is the same as the prismatic axis.
+    //
+    //
+    // Also, compute the right hand side (RHS) of the rotation constraint equation set.
+    // The first 2 element will result in the relative angular velocity of the two
+    // bodies along axis p and q. This is set to bring the rotoide back into alignment.
+    // if `theta' is the angle between ax1 and ax2, we need an angular velocity
+    // along u to cover angle erp*theta in one step :
+    //   |angular_velocity| = angle/time = erp*theta / stepsize
+    //                      = (erp*fps) * theta
+    //    angular_velocity  = |angular_velocity| * u
+    //                      = (erp*fps) * theta * u
+    // where rotation along unit length axis u by theta brings body 2's frame
+    //
+    // if theta is smallish, sin(theta) ~= theta and cos(theta) ~= 1
+    // where the quaternion of the relative rotation between the two bodies is
+    //    quat = [cos(theta/2) sin(theta/2)*u]
+    //    quat = [1 theta/2*u]
+    //         => q[0] ~= 1
+    //            2 * q[1+i] = theta * u[i]
+    //
+    // Since there is no constraint along the rotoide axis
+    // only along p and q that we want the same angular velocity and need to reduce
+    // the error
+    dVector3 b, ax1, p, q;
+    dMultiply0_331 ( ax1, node[0].body->posr.R, axis1 );
+
+    // Find the 2 axis perpendicular to the rotoide axis.
+    dPlaneSpace ( ax1, p, q );
+
+    // LHS
+    dCopyVector3 ( J1 + GI2__JA_MIN, p );
+
+    if ( body1 )
+    {
+        dCopyNegatedVector3 ( J2 + GI2__JA_MIN, p );
+    }
+
+    dCopyVector3 ( J1 + rowskip + GI2__JA_MIN, q );
+
+    if ( body1 ) 
+    {
+        dCopyNegatedVector3 ( J2 + rowskip + GI2__JA_MIN, q );
+
+        // Some math for the RHS
+        dVector3 ax2;
+        dMultiply0_331 ( ax2, R2, axis2 );
+        dCalcVectorCross3( b, ax1, ax2 );
+    }
+    else
+    {
+        // Some math for the RHS
+        dCalcVectorCross3( b, ax1, axis2 );
+    }
+
+    // RHS
+    pairRhsCfm[GI2_RHS] = k * dCalcVectorDot3 ( p, b );
+    pairRhsCfm[pairskip + GI2_RHS] = k * dCalcVectorDot3 ( q, b );
+
+
+    // ======================================================================
+    // Work on the linear part (i.e row 2,3)
+    // p2 + R2 anchor2' = p1 + R1 dist'
+    // v2 + w2 R2 anchor2' + R2 d(anchor2')/dt  = v1 + w1 R1 dist' + R1 d(dist')/dt
+    // v2 + w2 x anchor2 = v1 + w1 x dist + v_p
+    // v_p is speed of prismatic joint (i.e. elongation rate)
+    // Since the constraints are perpendicular to v_p we have:
+    // p . v_p = 0 and q . v_p = 0
+    // Along p and q we have (since sliding along the prismatic axis is disregarded):
+    // u . ( v2 + w2 x anchor2 = v1 + w1 x dist + v_p) ( where u is p or q )
+    // Simplify
+    // u . v2 + u. w2 x anchor2 = u . v1 + u . w1 x dist
+    // or
+    // u . v1 - u . v2 + u . w1 x dist - u2 . w2 x anchor2 = 0
+    // using the fact that (a x b = - b x a)
+    // u . v1 - u . v2 - u . dist x w1  + u . anchor2 x w2 = 0
+    // With the help of the triple product:
+    //   i.e.  a . b x c = b . c x a = c . a x b  or  a . b x c = a x b . c
+    //   Ref: http://mathworld.wolfram.com/ScalarTripleProduct.html
+    // u . v1 - u . v2 - u x dist . w1 + u x anchor2 . w2 = 0
+    // u . v1 - u . v2 + dist x u . w1 - u x anchor2 . w2 = 0
+    //
+    // Coeff for 1er line of: J1l => p, J2l => -p
+    // Coeff for 2er line of: J1l => q, J2l => -q
+    // Coeff for 1er line of: J1a => dist x p, J2a => p x anchor2
+    // Coeff for 2er line of: J1a => dist x q, J2a => q x anchor2
+
+    int currRowSkip = 2 * rowskip;
+    {
+        dCopyVector3 ( J1 + currRowSkip + GI2__JL_MIN, p );
+        dCalcVectorCross3( J1 + currRowSkip + GI2__JA_MIN, dist, p );
+
+        if ( body1 )
+        {
+            // info->J2l[s2+i] = -p[i];
+            dCopyNegatedVector3 ( J2 + currRowSkip + GI2__JL_MIN, p );
+            // q x anchor2 instead of anchor2 x q since we want the negative value
+            dCalcVectorCross3( J2 + currRowSkip + GI2__JA_MIN, p, lanchor2 );
+        }
+    }
+
+    currRowSkip += rowskip;
+    {
+        dCopyVector3 ( J1 + currRowSkip + GI2__JL_MIN, q );
+        dCalcVectorCross3( J1 + currRowSkip + GI2__JA_MIN, dist, q );
+
+        if ( body1 )
+        {
+            // info->J2l[s3+i] = -q[i];
+            dCopyNegatedVector3 ( J2 + currRowSkip + GI2__JL_MIN, q );
+            // The cross product is in reverse order since we want the negative value
+            dCalcVectorCross3( J2 + currRowSkip + GI2__JA_MIN, q, lanchor2 );
+        }
+    }
+
+    // We want to make correction for motion not in the line of the axis
+    // We calculate the displacement w.r.t. the "anchor" pt.
+    // i.e. Find the difference between the current position and the initial
+    //      position along the constrained axies (i.e. axis p and q).
+    // The bodies can move w.r.t each other only along the prismatic axis
+    //
+    // Compute the RHS of rows 2 and 3
+    dVector3 err;
+    dMultiply0_331 ( err, R1, anchor1 );
+    dSubtractVectors3( err, dist, err );
+
+    int currPairSkip = 2 * pairskip;
+    {
+        pairRhsCfm[currPairSkip + GI2_RHS] = k * dCalcVectorDot3 ( p, err );
+    }
+
+    currPairSkip += pairskip;
+    {
+        pairRhsCfm[currPairSkip + GI2_RHS] = k * dCalcVectorDot3 ( q, err );
+    }
+
+    currRowSkip += rowskip; currPairSkip += pairskip;
+    
+    if ( body1 || (flags & dJOINT_REVERSE) == 0 )
+    {
+        if (limotP.addLimot ( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax1, 0 ))
+        {
+            currRowSkip += rowskip; currPairSkip += pairskip;
+        }
+    }
+    else
+    {
+        dVector3 rAx1;
+        dCopyNegatedVector3(rAx1, ax1);
+
+        if (limotP.addLimot ( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, rAx1, 0 ))
+        {
+            currRowSkip += rowskip; currPairSkip += pairskip;
+        }
+    }
+
+    limotR.addLimot ( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax1, 1 );
+}
+
+void dJointSetPistonAnchor ( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Piston );
+    setAnchors ( joint, x, y, z, joint->anchor1, joint->anchor2 );
+    joint->computeInitialRelativeRotation();
+
+}
+
+void dJointSetPistonAnchorOffset (dJointID j, dReal x, dReal y, dReal z,
+                                  dReal dx, dReal dy, dReal dz)
+{
+    dxJointPiston* joint = (dxJointPiston*) j;
+    dUASSERT (joint,"bad joint argument");
+    checktype ( joint, Piston );
+
+    if (joint->flags & dJOINT_REVERSE)
+    {
+        dx = -dx;
+        dy = -dy;
+        dz = -dz;
+    }
+
+    if (joint->node[0].body)
+    {
+        joint->node[0].body->posr.pos[0] -= dx;
+        joint->node[0].body->posr.pos[1] -= dy;
+        joint->node[0].body->posr.pos[2] -= dz;
+    }
+
+    setAnchors (joint,x ,y, z, joint->anchor1, joint->anchor2);
+
+    if (joint->node[0].body)
+    {
+        joint->node[0].body->posr.pos[0] += dx;
+        joint->node[0].body->posr.pos[1] += dy;
+        joint->node[0].body->posr.pos[2] += dz;
+    }
+
+    joint->computeInitialRelativeRotation();
+}
+
+
+
+void dJointGetPistonAnchor ( dJointID j, dVector3 result )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    dUASSERT ( result, "bad result argument" );
+    checktype ( joint, Piston );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAnchor2 ( joint, result, joint->anchor2 );
+    else
+        getAnchor ( joint, result, joint->anchor1 );
+}
+
+
+void dJointGetPistonAnchor2 ( dJointID j, dVector3 result )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    dUASSERT ( result, "bad result argument" );
+    checktype ( joint, Piston );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAnchor ( joint, result, joint->anchor1 );
+    else
+        getAnchor2 ( joint, result, joint->anchor2 );
+}
+
+
+
+void dJointSetPistonAxis ( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Piston );
+
+    setAxes ( joint, x, y, z, joint->axis1, joint->axis2 );
+
+    joint->computeInitialRelativeRotation();
+}
+
+
+void dJointSetPistonAxisDelta ( dJointID j, dReal x, dReal y, dReal z,
+                               dReal dx, dReal dy, dReal dz )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Piston );
+
+    setAxes ( joint, x, y, z, joint->axis1, joint->axis2 );
+
+    joint->computeInitialRelativeRotation();
+
+    dVector3 c = {0,0,0};
+    if ( joint->node[1].body )
+    {
+        c[0] = ( joint->node[0].body->posr.pos[0] -
+            joint->node[1].body->posr.pos[0] - dx );
+        c[1] = ( joint->node[0].body->posr.pos[1] -
+            joint->node[1].body->posr.pos[1] - dy );
+        c[2] = ( joint->node[0].body->posr.pos[2] -
+            joint->node[1].body->posr.pos[2] - dz );
+    }
+    else /*if ( joint->node[0].body )*/ // -- body[0] should always be present -- there is a matrix multiplication below
+    {
+        c[0] = joint->node[0].body->posr.pos[0] - dx;
+        c[1] = joint->node[0].body->posr.pos[1] - dy;
+        c[2] = joint->node[0].body->posr.pos[2] - dz;
+    }
+
+    // Convert into frame of body 1
+    dMultiply1_331 ( joint->anchor1, joint->node[0].body->posr.R, c );
+}
+
+
+
+void dJointGetPistonAxis ( dJointID j, dVector3 result )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    dUASSERT ( result, "bad result argument" );
+    checktype ( joint, Piston );
+
+    getAxis ( joint, result, joint->axis1 );
+}
+
+void dJointSetPistonParam ( dJointID j, int parameter, dReal value )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Piston );
+
+    if ( ( parameter & 0xff00 ) == 0x100 )
+    {
+        joint->limotR.set ( parameter & 0xff, value );
+    }
+    else
+    {
+        joint->limotP.set ( parameter, value );
+    }
+}
+
+
+dReal dJointGetPistonParam ( dJointID j, int parameter )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Piston );
+
+    if ( ( parameter & 0xff00 ) == 0x100 )
+    {
+        return joint->limotR.get ( parameter & 0xff );
+    }
+    else
+    {
+        return joint->limotP.get ( parameter );
+    }
+}
+
+
+void dJointAddPistonForce ( dJointID j, dReal force )
+{
+    dxJointPiston* joint = ( dxJointPiston* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Piston );
+
+    if ( joint->flags & dJOINT_REVERSE )
+        force -= force;
+
+    dVector3 axis;
+    getAxis ( joint, axis, joint->axis1 );
+    // axis[i] *= force
+    dScaleVector3( axis, force );
+
+
+    if ( joint->node[0].body != 0 )
+        dBodyAddForce ( joint->node[0].body, axis[0], axis[1], axis[2] );
+    if ( joint->node[1].body != 0 )
+        dBodyAddForce ( joint->node[1].body, -axis[0], -axis[1], -axis[2] );
+
+    if ( joint->node[0].body != 0 && joint->node[1].body != 0 )
+    {
+        // Case where we don't need ltd since center of mass of both bodies
+        // pass by the anchor point '*' when travelling along the prismatic axis.
+        //                                     Body_2
+        //   Body_1                             -----
+        //    ---                |--           |     |
+        //   |   |---------------*-------------|     |     ---> prismatic axis
+        //    ---                |--           |     |
+        //                                      -----
+        //                                      Body_2
+        // Case where we need ltd
+        //   Body_1
+        //    ---
+        //   |   |---------
+        //    ---          |
+        //                 |     |--
+        //                  -----*-----                    ---> prismatic axis
+        //                       |--   |
+        //                             |
+        //                             |
+        //                             |        -----
+        //                             |       |     |
+        //                              -------|     |
+        //                                     |     |
+        //                                      -----
+        //                                      Body_2
+        //
+        // In real life force apply at the '*' point
+        // But in ODE the force are applied on the center of mass of Body_1 and Body_2
+        // So we have to add torques on both bodies to compensate for that when there
+        // is an offset between the anchor point and the center of mass of both bodies.
+        //
+        // We need to add to each body T = r x F
+        // Where r is the distance between the cm and '*'
+
+        dVector3 ltd; // Linear Torque Decoupling vector (a torque)
+        dVector3 c;   // Distance of the body w.r.t the anchor
+        // N.B. The distance along the prismatic axis might not
+        //      not be included in this variable since it won't add
+        //      anything to the ltd.
+
+        // Calculate the distance of the body w.r.t the anchor
+
+        // The anchor1 of body1 can be used since:
+        // Real anchor = Position of body 1 + anchor + d* axis1 = anchor in world frame
+        // d is the position of the prismatic joint (i.e. elongation)
+        // Since axis1 x axis1 == 0
+        // We can do the following.
+        dMultiply0_331 ( c, joint->node[0].body->posr.R, joint->anchor1 );
+        dCalcVectorCross3( ltd, c, axis );
+        dBodyAddTorque ( joint->node[0].body, ltd[0], ltd[1], ltd[2] );
+
+
+        dMultiply0_331 ( c, joint->node[1].body->posr.R, joint->anchor2 );
+        dCalcVectorCross3( ltd, c, axis );
+        dBodyAddTorque ( joint->node[1].body, ltd[0], ltd[1], ltd[2] );
+    }
+}
+
+
+dJointType
+dxJointPiston::type() const
+{
+    return dJointTypePiston;
+}
+
+
+sizeint
+dxJointPiston::size() const
+{
+    return sizeof ( *this );
+}
+
+
+
+void
+dxJointPiston::setRelativeValues()
+{
+    dVector3 vec;
+    dJointGetPistonAnchor(this, vec);
+    setAnchors( this, vec[0], vec[1], vec[2], anchor1, anchor2 );
+
+    dJointGetPistonAxis(this, vec);
+    setAxes( this,  vec[0], vec[1], vec[2], axis1, axis2 );
+
+    computeInitialRelativeRotation();
+}
+
+
+
+
+void
+dxJointPiston::computeInitialRelativeRotation()
+{
+    if ( node[0].body )
+    {
+        if ( node[1].body )
+        {
+            dQMultiply1 ( qrel, node[0].body->q, node[1].body->q );
+        }
+        else
+        {
+            // set joint->qrel to the transpose of the first body q
+            qrel[0] = node[0].body->q[0];
+            for ( int i = 1; i < 4; i++ )
+                qrel[i] = -node[0].body->q[i];
+            // WARNING do we need the - in -joint->node[0].body->q[i]; or not
+        }
+    }
+}
diff --git a/libs/ode-0.16.1/ode/src/joints/piston.h b/libs/ode-0.16.1/ode/src/joints/piston.h
new file mode 100644
index 0000000..c202c20
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/piston.h
@@ -0,0 +1,112 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_PISTON_H_
+#define _ODE_JOINT_PISTON_H_
+
+#include "joint.h"
+
+
+////////////////////////////////////////////////////////////////////////////////
+/// Component of a Piston joint
+/// <PRE>
+///                              |- Anchor point
+///      Body_1                  |                       Body_2
+///      +---------------+       V                       +------------------+
+///     /               /|                             /                  /|
+///    /               / +       |--      ______      /                  / +
+///   /      x        /./........x.......(_____()..../         x        /.......> axis
+///  +---------------+ /         |--                +------------------+ /
+///  |               |/                             |                  |/
+///  +---------------+                              +------------------+
+///          |                                                 |
+///          |                                                 |
+///          |------------------> <----------------------------|
+///              anchor1                  anchor2
+///
+///
+/// </PRE>
+///
+/// When the prismatic joint as been elongated (i.e. dJointGetPistonPosition)
+/// return a value >  0
+/// <PRE>
+///                                   |- Anchor point
+///      Body_1                       |                       Body_2
+///      +---------------+            V                       +------------------+
+///     /               /|                                  /                  /|
+///    /               / +            |--      ______      /                  / +
+///   /      x        /./........_____x.......(_____()..../         x        /.......> axis
+///  +---------------+ /              |--                +------------------+ /
+///  |               |/                                  |                  |/
+///  +---------------+                                   +------------------+
+///          |                                                      |
+///          |                                                      |
+///          |------------------>      <----------------------------|
+///              anchor1         |----|         anchor2
+///                                ^
+///                                |-- This is what dJointGetPistonPosition will
+///                                    return
+/// </PRE>
+////////////////////////////////////////////////////////////////////////////////
+struct dxJointPiston : public dxJoint
+{
+    dVector3 axis1;          ///< Axis of the prismatic and rotoide w.r.t first body
+    dVector3 axis2;          ///< Axis of the prismatic and rotoide w.r.t second body
+
+
+    dQuaternion qrel;        ///< Initial relative rotation body1 -> body2
+
+    /// Anchor w.r.t first body.
+    /// This is the same as the offset for the Slider joint
+    /// @note To find the position of the anchor when the body 1 has moved
+    ///       you must add the position of the prismatic joint
+    ///       i.e anchor = R1 * anchor1 + dJointGetPistonPosition() * (R1 * axis1)
+    dVector3 anchor1;
+    dVector3 anchor2;        //< anchor w.r.t second body
+
+    /// limit and motor information for the prismatic
+    /// part of the joint
+    dxJointLimitMotor limotP;
+
+    /// limit and motor information for the rotoide
+    /// part of the joint
+    dxJointLimitMotor limotR;
+
+    dxJointPiston( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+    virtual void setRelativeValues();
+
+    void computeInitialRelativeRotation();
+};
+
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/plane2d.cpp b/libs/ode-0.16.1/ode/src/joints/plane2d.cpp
new file mode 100644
index 0000000..0caecb3
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/plane2d.cpp
@@ -0,0 +1,195 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "plane2d.h"
+#include "joint_internal.h"
+
+
+
+//****************************************************************************
+// Plane2D
+/*
+This code is part of the Plane2D ODE joint
+by psero@gmx.de
+Wed Apr 23 18:53:43 CEST 2003
+*/
+
+
+static const dReal   Midentity[3][3] =
+{
+    {   1,  0,  0   },
+    {   0,  1,  0   },
+    {   0,  0,  1,  }
+};
+
+
+dxJointPlane2D::dxJointPlane2D( dxWorld *w ) :
+    dxJoint( w )
+{
+    motor_x.init( world );
+    motor_y.init( world );
+    motor_angle.init( world );
+}
+
+
+void 
+dxJointPlane2D::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 6;
+}
+
+
+void
+dxJointPlane2D::getInfo1( dxJoint::Info1 *info )
+{
+    info->nub = 3;
+    info->m = 3;
+
+    if ( motor_x.fmax > 0 )
+        row_motor_x = info->m++;
+    else
+        row_motor_x = 0;
+
+    if ( motor_y.fmax > 0 )
+        row_motor_y = info->m++;
+    else
+        row_motor_y = 0;
+
+    if ( motor_angle.fmax > 0 )
+        row_motor_angle = info->m++;
+    else
+        row_motor_angle = 0;
+}
+
+
+
+void
+dxJointPlane2D::getInfo2( dReal worldFPS, dReal worldERP, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    dReal eps = worldFPS * worldERP;
+
+    /*
+        v = v1, w = omega1
+        (v2, omega2 not important (== static environment))
+
+        constraint equations:
+            vz = 0
+            wx = 0
+            wy = 0
+
+        <=> ( 0 0 1 ) (vx)   ( 0 0 0 ) (wx)   ( 0 )
+            ( 0 0 0 ) (vy) + ( 1 0 0 ) (wy) = ( 0 )
+            ( 0 0 0 ) (vz)   ( 0 1 0 ) (wz)   ( 0 )
+            J1/J1l           Omega1/J1a
+    */
+
+    // fill in linear and angular coeff. for left hand side:
+
+    J1[GI2_JLZ] = 1;
+    J1[rowskip + GI2_JAX] = 1;
+    J1[2 * rowskip + GI2_JAY] = 1;
+
+    // error correction (against drift):
+
+    // a) linear vz, so that z (== pos[2]) == 0
+    pairRhsCfm[GI2_RHS] = eps * -node[0].body->posr.pos[2];
+
+# if 0
+    // b) angular correction? -> left to application !!!
+    dReal       *body_z_axis = &node[0].body->R[8];
+    pairRhsCfm[pairskip + GI2_RHS] = eps * + atan2( body_z_axis[1], body_z_axis[2] );  // wx error
+    pairRhsCfm[2 * pairskip + GI2_RHS] = eps * -atan2( body_z_axis[0], body_z_axis[2] );  // wy error
+# endif
+
+    // if the slider is powered, or has joint limits, add in the extra row:
+
+    if ( row_motor_x > 0 )
+    {
+        int currRowSkip = row_motor_x * rowskip, currPairSkip = row_motor_x * pairskip;
+        motor_x.addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, Midentity[0], 0 );
+    }
+
+    if ( row_motor_y > 0 )
+    {
+        int currRowSkip = row_motor_y * rowskip, currPairSkip = row_motor_y * pairskip;
+        motor_y.addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, Midentity[1], 0 );
+    }
+
+    if ( row_motor_angle > 0 )
+    {
+        int currRowSkip = row_motor_angle * rowskip, currPairSkip = row_motor_angle * pairskip;
+        motor_angle.addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, Midentity[2], 1 );
+    }
+}
+
+
+dJointType
+dxJointPlane2D::type() const
+{
+    return dJointTypePlane2D;
+}
+
+
+sizeint
+dxJointPlane2D::size() const
+{
+    return sizeof( *this );
+}
+
+
+
+void dJointSetPlane2DXParam( dxJoint *joint,
+                            int parameter, dReal value )
+{
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Plane2D );
+    dxJointPlane2D* joint2d = ( dxJointPlane2D* )( joint );
+    joint2d->motor_x.set( parameter, value );
+}
+
+
+void dJointSetPlane2DYParam( dxJoint *joint,
+                            int parameter, dReal value )
+{
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Plane2D );
+    dxJointPlane2D* joint2d = ( dxJointPlane2D* )( joint );
+    joint2d->motor_y.set( parameter, value );
+}
+
+
+
+void dJointSetPlane2DAngleParam( dxJoint *joint,
+                                int parameter, dReal value )
+{
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Plane2D );
+    dxJointPlane2D* joint2d = ( dxJointPlane2D* )( joint );
+    joint2d->motor_angle.set( parameter, value );
+}
+
diff --git a/libs/ode-0.16.1/ode/src/joints/plane2d.h b/libs/ode-0.16.1/ode/src/joints/plane2d.h
new file mode 100644
index 0000000..a9ccab6
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/plane2d.h
@@ -0,0 +1,54 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_PLANE2D_H_
+#define _ODE_JOINT_PLANE2D_H_
+
+#include "joint.h"
+
+
+// 2d joint, constrains to z == 0
+
+struct dxJointPlane2D : public dxJoint
+{
+    int                 row_motor_x;
+    int                 row_motor_y;
+    int                 row_motor_angle;
+    dxJointLimitMotor   motor_x;
+    dxJointLimitMotor   motor_y;
+    dxJointLimitMotor   motor_angle;
+
+
+    dxJointPlane2D( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+};
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/pr.cpp b/libs/ode-0.16.1/ode/src/joints/pr.cpp
new file mode 100644
index 0000000..7d34ebe
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/pr.cpp
@@ -0,0 +1,613 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "pr.h"
+#include "joint_internal.h"
+
+
+
+//****************************************************************************
+// Prismatic and Rotoide
+
+dxJointPR::dxJointPR( dxWorld *w ) :
+    dxJoint( w )
+{
+    // Default Position
+    // Z^
+    //  | Body 1       P      R          Body2
+    //  |+---------+   _      _         +-----------+
+    //  ||         |----|----(_)--------+           |
+    //  |+---------+   -                +-----------+
+    //  |
+    // X.-----------------------------------------> Y
+    // N.B. X is comming out of the page
+    dSetZero( anchor2, 4 );
+
+    dSetZero( axisR1, 4 );
+    axisR1[0] = 1;
+    dSetZero( axisR2, 4 );
+    axisR2[0] = 1;
+
+    dSetZero( axisP1, 4 );
+    axisP1[1] = 1;
+    dSetZero( qrel, 4 );
+    dSetZero( offset, 4 );
+
+    limotR.init( world );
+    limotP.init( world );
+}
+
+
+dReal dJointGetPRPosition( dJointID j )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PR );
+
+    dVector3 q;
+    // get the offset in global coordinates
+    dMultiply0_331( q, joint->node[0].body->posr.R, joint->offset );
+
+    if ( joint->node[1].body )
+    {
+        dVector3 anchor2;
+
+        // get the anchor2 in global coordinates
+        dMultiply0_331( anchor2, joint->node[1].body->posr.R, joint->anchor2 );
+
+        q[0] = (( joint->node[0].body->posr.pos[0] + q[0] ) -
+            ( joint->node[1].body->posr.pos[0] + anchor2[0] ) );
+        q[1] = (( joint->node[0].body->posr.pos[1] + q[1] ) -
+            ( joint->node[1].body->posr.pos[1] + anchor2[1] ) );
+        q[2] = (( joint->node[0].body->posr.pos[2] + q[2] ) -
+            ( joint->node[1].body->posr.pos[2] + anchor2[2] ) );
+
+    }
+    else
+    {
+        //N.B. When there is no body 2 the joint->anchor2 is already in
+        //     global coordinates
+
+        q[0] = (( joint->node[0].body->posr.pos[0] + q[0] ) -
+            ( joint->anchor2[0] ) );
+        q[1] = (( joint->node[0].body->posr.pos[1] + q[1] ) -
+            ( joint->anchor2[1] ) );
+        q[2] = (( joint->node[0].body->posr.pos[2] + q[2] ) -
+            ( joint->anchor2[2] ) );
+
+        if ( joint->flags & dJOINT_REVERSE )
+        {
+            q[0] = -q[0];
+            q[1] = -q[1];
+            q[2] = -q[2];
+        }
+    }
+
+    dVector3 axP;
+    // get prismatic axis in global coordinates
+    dMultiply0_331( axP, joint->node[0].body->posr.R, joint->axisP1 );
+
+    return dCalcVectorDot3( axP, q );
+}
+
+dReal dJointGetPRPositionRate( dJointID j )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PR );
+    // get axis1 in global coordinates
+    dVector3 ax1;
+    dMultiply0_331( ax1, joint->node[0].body->posr.R, joint->axisP1 );
+
+    if ( joint->node[1].body )
+    {
+        dVector3 lv2;
+        dBodyGetRelPointVel( joint->node[1].body, joint->anchor2[0], joint->anchor2[1], joint->anchor2[2], lv2 );
+        return dCalcVectorDot3( ax1, joint->node[0].body->lvel ) - dCalcVectorDot3( ax1, lv2 );
+    }
+    else
+    {
+        dReal rate = dCalcVectorDot3( ax1, joint->node[0].body->lvel );
+        return ( (joint->flags & dJOINT_REVERSE) ? -rate : rate);
+    }
+}
+
+
+
+dReal dJointGetPRAngle( dJointID j )
+{
+    dxJointPR* joint = ( dxJointPR* )j;
+    dAASSERT( joint );
+    checktype( joint, PR );
+    if ( joint->node[0].body )
+    {
+        dReal ang = getHingeAngle( joint->node[0].body,
+            joint->node[1].body,
+            joint->axisR1,
+            joint->qrel );
+        if ( joint->flags & dJOINT_REVERSE )
+            return -ang;
+        else
+            return ang;
+    }
+    else return 0;
+}
+
+
+
+dReal dJointGetPRAngleRate( dJointID j )
+{
+    dxJointPR* joint = ( dxJointPR* )j;
+    dAASSERT( joint );
+    checktype( joint, PR );
+    if ( joint->node[0].body )
+    {
+        dVector3 axis;
+        dMultiply0_331( axis, joint->node[0].body->posr.R, joint->axisR1 );
+        dReal rate = dCalcVectorDot3( axis, joint->node[0].body->avel );
+        if ( joint->node[1].body ) rate -= dCalcVectorDot3( axis, joint->node[1].body->avel );
+        if ( joint->flags & dJOINT_REVERSE ) rate = -rate;
+        return rate;
+    }
+    else return 0;
+}
+
+
+
+
+void 
+dxJointPR::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 6;
+}
+
+
+
+void
+dxJointPR::getInfo1( dxJoint::Info1 *info )
+{
+    info->nub = 4;
+    info->m = 4;
+
+
+    // see if we're at a joint limit.
+    limotP.limit = 0;
+    if (( limotP.lostop > -dInfinity || limotP.histop < dInfinity ) &&
+        limotP.lostop <= limotP.histop )
+    {
+        // measure joint position
+        dReal pos = dJointGetPRPosition( this );
+        limotP.testRotationalLimit( pos );  // N.B. The function is ill named
+    }
+
+    // powered needs an extra constraint row
+    if ( limotP.limit || limotP.fmax > 0 ) info->m++;
+
+
+    // see if we're at a joint limit.
+    limotR.limit = 0;
+    if (( limotR.lostop >= -M_PI || limotR.histop <= M_PI ) &&
+        limotR.lostop <= limotR.histop )
+    {
+        dReal angle = getHingeAngle( node[0].body,
+            node[1].body,
+            axisR1, qrel );
+        limotR.testRotationalLimit( angle );
+    }
+
+    // powered morit or at limits needs an extra constraint row
+    if ( limotR.limit || limotR.fmax > 0 ) info->m++;
+
+}
+
+
+
+void
+dxJointPR::getInfo2( dReal worldFPS, dReal worldERP, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    dReal k = worldFPS * worldERP;
+
+
+    dVector3 q;  // plane space of axP and after that axR
+
+    // pull out pos and R for both bodies. also get the `connection'
+    // vector pos2-pos1.
+
+    dReal *pos2 = NULL, *R2 = NULL;
+    
+    dReal *pos1 = node[0].body->posr.pos;
+    dReal *R1 = node[0].body->posr.R;
+
+    dxBody *body1 = node[1].body;
+
+    if ( body1 )
+    {
+        pos2 = body1->posr.pos;
+        R2 = body1->posr.R;
+    }
+
+
+    dVector3 axP; // Axis of the prismatic joint in global frame
+    dMultiply0_331( axP, R1, axisP1 );
+
+    // distance between the body1 and the anchor2 in global frame
+    // Calculated in the same way as the offset
+    dVector3 wanchor2 = {0, 0, 0}, dist;
+
+    if ( body1 )
+    {
+        // Calculate anchor2 in world coordinate
+        dMultiply0_331( wanchor2, R2, anchor2 );
+        dist[0] = wanchor2[0] + pos2[0] - pos1[0];
+        dist[1] = wanchor2[1] + pos2[1] - pos1[1];
+        dist[2] = wanchor2[2] + pos2[2] - pos1[2];
+    }
+    else
+    {
+        if ( (flags & dJOINT_REVERSE) != 0 )
+        {
+            dSubtractVectors3(dist, pos1, anchor2); // Invert the value
+        }
+        else
+        {
+            dSubtractVectors3(dist, anchor2, pos1); // Invert the value
+        }
+    }
+
+
+    // ======================================================================
+    // Work on the Rotoide part (i.e. row 0, 1 and maybe 4 if rotoide powered
+
+    // Set the two rotoide rows. The rotoide axis should be the only unconstrained
+    // rotational axis, the angular velocity of the two bodies perpendicular to
+    // the rotoide axis should be equal. Thus the constraint equations are
+    //    p*w1 - p*w2 = 0
+    //    q*w1 - q*w2 = 0
+    // where p and q are unit vectors normal to the rotoide axis, and w1 and w2
+    // are the angular velocity vectors of the two bodies.
+    dVector3 ax2;
+    dVector3 ax1;
+    dMultiply0_331( ax1, R1, axisR1 );
+    dCalcVectorCross3( q , ax1, axP );
+
+    dCopyVector3(J1 + GI2__JA_MIN, axP);
+
+    if ( body1 )
+    {
+        dCopyNegatedVector3(J2 + GI2__JA_MIN, axP);
+    }
+
+    dCopyVector3(J1 + rowskip + GI2__JA_MIN, q);
+
+    if ( body1 )
+    {
+        dCopyNegatedVector3(J2 + rowskip + GI2__JA_MIN, q);
+    }
+
+    // Compute the right hand side of the constraint equation set. Relative
+    // body velocities along p and q to bring the rotoide back into alignment.
+    // ax1,ax2 are the unit length rotoide axes of body1 and body2 in world frame.
+    // We need to rotate both bodies along the axis u = (ax1 x ax2).
+    // if `theta' is the angle between ax1 and ax2, we need an angular velocity
+    // along u to cover angle erp*theta in one step :
+    //   |angular_velocity| = angle/time = erp*theta / stepsize
+    //                      = (erp*fps) * theta
+    //    angular_velocity  = |angular_velocity| * (ax1 x ax2) / |ax1 x ax2|
+    //                      = (erp*fps) * theta * (ax1 x ax2) / sin(theta)
+    // ...as ax1 and ax2 are unit length. if theta is smallish,
+    // theta ~= sin(theta), so
+    //    angular_velocity  = (erp*fps) * (ax1 x ax2)
+    // ax1 x ax2 is in the plane space of ax1, so we project the angular
+    // velocity to p and q to find the right hand side.
+
+    if ( body1 )
+    {
+        dMultiply0_331( ax2, R2, axisR2 );
+    }
+    else
+    {
+        dCopyVector3(ax2, axisR2);
+    }
+
+    dVector3 b;
+    dCalcVectorCross3( b, ax1, ax2 );
+    pairRhsCfm[GI2_RHS] = k * dCalcVectorDot3( b, axP );
+    pairRhsCfm[pairskip + GI2_RHS] = k * dCalcVectorDot3( b, q );
+
+
+
+    // ==========================
+    // Work on the Prismatic part (i.e row 2,3 and 4 if only the prismatic is powered
+    // or 5 if rotoide and prismatic powered
+
+    // two rows. we want: vel2 = vel1 + w1 x c ... but this would
+    // result in three equations, so we project along the planespace vectors
+    // so that sliding along the prismatic axis is disregarded. for symmetry we
+    // also substitute (w1+w2)/2 for w1, as w1 is supposed to equal w2.
+
+    // p1 + R1 dist' = p2 + R2 anchor2' ## OLD ## p1 + R1 anchor1' = p2 + R2 dist'
+    // v1 + w1 x R1 dist' + v_p = v2 + w2 x R2 anchor2'## OLD  v1 + w1 x R1 anchor1' = v2 + w2 x R2 dist' + v_p
+    // v_p is speed of prismatic joint (i.e. elongation rate)
+    // Since the constraints are perpendicular to v_p we have:
+    // p dot v_p = 0 and q dot v_p = 0
+    // ax1 dot ( v1 + w1 x dist = v2 + w2 x anchor2 )
+    // q dot ( v1 + w1 x dist = v2 + w2 x anchor2 )
+    // ==
+    // ax1 . v1 + ax1 . w1 x dist = ax1 . v2 + ax1 . w2 x anchor2 ## OLD ## ax1 . v1 + ax1 . w1 x anchor1 = ax1 . v2 + ax1 . w2 x dist
+    // since a . (b x c) = - b . (a x c) = - (a x c) . b
+    // and a x b = - b x a
+    // ax1 . v1 - ax1 x dist . w1 - ax1 . v2 - (- ax1 x anchor2 . w2) = 0
+    // ax1 . v1 + dist x ax1 . w1 - ax1 . v2 - anchor2 x ax1 . w2 = 0
+    // Coeff for 1er line of: J1l => ax1, J2l => -ax1
+    // Coeff for 2er line of: J1l => q, J2l => -q
+    // Coeff for 1er line of: J1a => dist x ax1, J2a => - anchor2 x ax1
+    // Coeff for 2er line of: J1a => dist x q,   J2a => - anchor2 x q
+
+    int currRowSkip = 2 * rowskip;
+    {
+        dCopyVector3( J1 + currRowSkip + GI2__JL_MIN, ax1 );
+        dCalcVectorCross3( J1 + currRowSkip + GI2__JA_MIN, dist, ax1 );
+
+        if ( body1 )
+        {
+            dCopyNegatedVector3( J2 + currRowSkip + GI2__JL_MIN, ax1 );
+            // ax2 x anchor2 instead of anchor2 x ax2 since we want the negative value
+            dCalcVectorCross3( J2 + currRowSkip + GI2__JA_MIN, ax2, wanchor2 );   // since ax1 == ax2
+        }
+    }
+
+    currRowSkip += rowskip;
+    {
+        dCopyVector3( J1 + currRowSkip + GI2__JL_MIN, q );
+        dCalcVectorCross3(J1 + currRowSkip + GI2__JA_MIN, dist, q );
+
+        if ( body1 )
+        {
+            dCopyNegatedVector3( J2 + currRowSkip + GI2__JL_MIN, q);
+            // The cross product is in reverse order since we want the negative value
+            dCalcVectorCross3( J2 + currRowSkip + GI2__JA_MIN, q, wanchor2 );
+        }
+    }
+
+    // We want to make correction for motion not in the line of the axisP
+    // We calculate the displacement w.r.t. the anchor pt.
+    //
+    // compute the elements 2 and 3 of right hand side.
+    // we want to align the offset point (in body 2's frame) with the center of body 1.
+    // The position should be the same when we are not along the prismatic axis
+    dVector3 err;
+    dMultiply0_331( err, R1, offset );
+    dSubtractVectors3(err, dist, err);
+
+    int currPairSkip = 2 * pairskip;
+    {
+        pairRhsCfm[currPairSkip + GI2_RHS] = k * dCalcVectorDot3( ax1, err );
+    }
+
+    currPairSkip += pairskip;
+    {
+        pairRhsCfm[currPairSkip + GI2_RHS] = k * dCalcVectorDot3( q, err );
+    }
+
+    currRowSkip += rowskip; currPairSkip += pairskip;
+
+    if (  body1 || (flags & dJOINT_REVERSE) == 0 )
+    {
+        if (limotP.addLimot ( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, axP, 0 ))
+        {
+            currRowSkip += rowskip; currPairSkip += pairskip;
+        }
+    }
+    else
+    {
+        dVector3 rAxP;
+        dCopyNegatedVector3(rAxP, axP);
+
+        if (limotP.addLimot ( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, rAxP, 0 ))
+        {
+            currRowSkip += rowskip; currPairSkip += pairskip;
+        }
+    }
+
+    limotR.addLimot ( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax1, 1 );
+}
+
+
+// compute initial relative rotation body1 -> body2, or env -> body1
+void
+dxJointPR::computeInitialRelativeRotation()
+{
+    if ( node[0].body )
+    {
+        if ( node[1].body )
+        {
+            dQMultiply1( qrel, node[0].body->q, node[1].body->q );
+        }
+        else
+        {
+            // set joint->qrel to the transpose of the first body q
+            qrel[0] = node[0].body->q[0];
+            for ( int i = 1; i < 4; i++ )
+                qrel[i] = -node[0].body->q[i];
+            // WARNING do we need the - in -joint->node[0].body->q[i]; or not
+        }
+    }
+}
+
+void dJointSetPRAnchor( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PR );
+    setAnchors( joint, x, y, z, joint->offset, joint->anchor2 );
+}
+
+
+void dJointSetPRAxis1( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PR );
+
+    setAxes( joint, x, y, z, joint->axisP1, 0 );
+
+    joint->computeInitialRelativeRotation();
+}
+
+
+void dJointSetPRAxis2( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PR );
+    setAxes( joint, x, y, z, joint->axisR1, joint->axisR2 );
+    joint->computeInitialRelativeRotation();
+}
+
+
+void dJointSetPRParam( dJointID j, int parameter, dReal value )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PR );
+    if (( parameter & 0xff00 ) == 0x100 )
+    {
+        joint->limotR.set( parameter & 0xff, value );  // Take only lower part of the
+    }                                              // parameter alue
+    else
+    {
+        joint->limotP.set( parameter, value );
+    }
+}
+
+void dJointGetPRAnchor( dJointID j, dVector3 result )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, PR );
+
+    if ( joint->node[1].body )
+        getAnchor2( joint, result, joint->anchor2 );
+    else
+    {
+        result[0] = joint->anchor2[0];
+        result[1] = joint->anchor2[1];
+        result[2] = joint->anchor2[2];
+    }
+}
+
+void dJointGetPRAxis1( dJointID j, dVector3 result )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, PR );
+    getAxis( joint, result, joint->axisP1 );
+}
+
+void dJointGetPRAxis2( dJointID j, dVector3 result )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, PR );
+    getAxis( joint, result, joint->axisR1 );
+}
+
+dReal dJointGetPRParam( dJointID j, int parameter )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PR );
+    if (( parameter & 0xff00 ) == 0x100 )
+    {
+        return joint->limotR.get( parameter & 0xff );
+    }
+    else
+    {
+        return joint->limotP.get( parameter );
+    }
+}
+
+void dJointAddPRTorque( dJointID j, dReal torque )
+{
+    dxJointPR* joint = ( dxJointPR* ) j;
+    dVector3 axis;
+    dAASSERT( joint );
+    checktype( joint, PR );
+
+    if ( joint->flags & dJOINT_REVERSE )
+        torque = -torque;
+
+    getAxis( joint, axis, joint->axisR1 );
+    axis[0] *= torque;
+    axis[1] *= torque;
+    axis[2] *= torque;
+
+    if ( joint->node[0].body != 0 )
+        dBodyAddTorque( joint->node[0].body, axis[0], axis[1], axis[2] );
+    if ( joint->node[1].body != 0 )
+        dBodyAddTorque( joint->node[1].body, -axis[0], -axis[1], -axis[2] );
+}
+
+
+dJointType
+dxJointPR::type() const
+{
+    return dJointTypePR;
+}
+
+sizeint
+dxJointPR::size() const
+{
+    return sizeof( *this );
+}
+
+
+void
+dxJointPR::setRelativeValues()
+{
+    dVector3 anchor;
+    dJointGetPRAnchor(this, anchor);
+    setAnchors( this, anchor[0], anchor[1], anchor[2], offset, anchor2 );
+
+    dVector3 axis;
+    dJointGetPRAxis1(this, axis);
+    setAxes( this, axis[0], axis[1], axis[2], axisP1, 0 );
+
+    dJointGetPRAxis2(this, axis);
+    setAxes( this, axis[0], axis[1], axis[2], axisR1, axisR2 );
+
+    computeInitialRelativeRotation();
+}
+
+
+
+
+
diff --git a/libs/ode-0.16.1/ode/src/joints/pr.h b/libs/ode-0.16.1/ode/src/joints/pr.h
new file mode 100644
index 0000000..930c0cd
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/pr.h
@@ -0,0 +1,100 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_PR_H_
+#define _ODE_JOINT_PR_H_
+
+#include "joint.h"
+
+
+
+/**
+ * The axisP must be perpendicular to axis2
+ * <PRE>
+ *                                        +-------------+
+ *                                        |      x      |
+ *                                        +------------\+
+ * Prismatic articulation                   ..     ..
+ *                       |                ..     ..
+ *                      \/              ..      ..
+ * +--------------+    --|        __..      ..  anchor2
+ * |      x       | .....|.......(__)     ..
+ * +--------------+    --|         ^     <
+ *        |----------------------->|
+ *            Offset               |--- Rotoide articulation
+ * </PRE>
+ */
+struct dxJointPR : public dxJoint
+{
+
+    /// @brief Position of the rotoide articulation w.r.t second body.
+    /// @note Position of body 2 in world frame + anchor2 in world frame give
+    /// the position of the rotoide articulation
+    dVector3 anchor2;
+
+
+    /// axis of the rotoide articulation w.r.t first body.
+    /// @note This is considered as axis1 from the parameter view.
+    dVector3 axisR1;
+
+    /// axis of the rotoide articulation w.r.t second body.
+    /// @note This is considered also as axis1 from the parameter view
+    dVector3 axisR2;
+
+    /// axis for the prismatic articulation w.r.t first body.
+    /// @note This is considered as axis2 in from the parameter view
+    dVector3 axisP1;
+
+
+    dQuaternion qrel;   ///< initial relative rotation body1 -> body2.
+
+
+    /// @brief vector between the body1 and the rotoide articulation.
+    ///
+    /// Going from the first to the second in the frame of body1.
+    /// That should be aligned with body1 center along axisP.
+    /// This is calculated when the axis are set.
+    dVector3 offset;
+    dxJointLimitMotor limotR; ///< limit and motor information for the rotoide articulation.
+    dxJointLimitMotor limotP; ///< limit and motor information for the prismatic articulation.
+
+
+    void computeInitialRelativeRotation();
+
+
+    dxJointPR( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+    virtual void setRelativeValues();
+};
+
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/pu.cpp b/libs/ode-0.16.1/ode/src/joints/pu.cpp
new file mode 100644
index 0000000..42eaf4b
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/pu.cpp
@@ -0,0 +1,756 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "pu.h"
+#include "joint_internal.h"
+
+
+//****************************************************************************
+// Prismatic and Universal
+
+dxJointPU::dxJointPU( dxWorld *w ) :
+    dxJointUniversal( w )
+{
+    // Default Position
+    //               Y                ^ Axis2
+    //              ^                 |
+    //             /                  |     ^ Axis1
+    // Z^         /                   |    /
+    //  |        / Body 2             |   /         Body 1
+    //  |       /  +---------+        |  /          +-----------+
+    //  |      /  /         /|        | /          /           /|
+    //  |     /  /         / +        _/     -    /           / +
+    //  |    /  /         /-/--------(_)----|--- /-----------/-------> AxisP
+    //  |   /  +---------+ /                 -  +-----------+ /
+    //  |  /   |         |/                     |           |/
+    //  | /    +---------+                      +-----------+
+    //  |/
+    //  .-----------------------------------------> X
+    //             |----------------->
+    //             Anchor2           <--------------|
+    //                               Anchor1
+    //
+
+    // Setting member variables which are w.r.t body2
+    dSetZero( axis1, 4 );
+    axis1[1] = 1;
+
+    // Setting member variables which are w.r.t body2
+    dSetZero( anchor2, 4 );
+    dSetZero( axis2, 4 );
+    axis2[2] = 1;
+
+    dSetZero( axisP1, 4 );
+    axisP1[0] = 1;
+
+    dSetZero( qrel1, 4 );
+    dSetZero( qrel2, 4 );
+
+
+    limotP.init( world );
+    limot1.init( world );
+    limot2.init( world );
+}
+
+
+dReal dJointGetPUPosition( dJointID j )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+
+    dVector3 q;
+    // get the offset in global coordinates
+    dMultiply0_331( q, joint->node[0].body->posr.R, joint->anchor1 );
+
+    if ( joint->node[1].body )
+    {
+        dVector3 anchor2;
+
+        // get the anchor2 in global coordinates
+        dMultiply0_331( anchor2, joint->node[1].body->posr.R, joint->anchor2 );
+
+        q[0] = (( joint->node[0].body->posr.pos[0] + q[0] ) -
+            ( joint->node[1].body->posr.pos[0] + anchor2[0] ) );
+        q[1] = (( joint->node[0].body->posr.pos[1] + q[1] ) -
+            ( joint->node[1].body->posr.pos[1] + anchor2[1] ) );
+        q[2] = (( joint->node[0].body->posr.pos[2] + q[2] ) -
+            ( joint->node[1].body->posr.pos[2] + anchor2[2] ) );
+    }
+    else
+    {
+        //N.B. When there is no body 2 the joint->anchor2 is already in
+        //     global coordinates
+
+        q[0] = (( joint->node[0].body->posr.pos[0] + q[0] ) -
+            ( joint->anchor2[0] ) );
+        q[1] = (( joint->node[0].body->posr.pos[1] + q[1] ) -
+            ( joint->anchor2[1] ) );
+        q[2] = (( joint->node[0].body->posr.pos[2] + q[2] ) -
+            ( joint->anchor2[2] ) );
+
+        if ( joint->flags & dJOINT_REVERSE )
+        {
+            q[0] = -q[0];
+            q[1] = -q[1];
+            q[2] = -q[2];
+        }
+    }
+
+    dVector3 axP;
+    // get prismatic axis in global coordinates
+    dMultiply0_331( axP, joint->node[0].body->posr.R, joint->axisP1 );
+
+    return dCalcVectorDot3( axP, q );
+}
+
+
+dReal dJointGetPUPositionRate( dJointID j )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+
+    if ( joint->node[0].body )
+    {
+        // We want to find the rate of change of the prismatic part of the joint
+        // We can find it by looking at the speed difference between body1 and the
+        // anchor point.
+
+        // r will be used to find the distance between body1 and the anchor point
+        dVector3 r;
+        dVector3 anchor2 = {0,0,0};
+        if ( joint->node[1].body )
+        {
+            // Find joint->anchor2 in global coordinates
+            dMultiply0_331( anchor2, joint->node[1].body->posr.R, joint->anchor2 );
+
+            r[0] = ( joint->node[0].body->posr.pos[0] -
+                ( anchor2[0] + joint->node[1].body->posr.pos[0] ) );
+            r[1] = ( joint->node[0].body->posr.pos[1] -
+                ( anchor2[1] + joint->node[1].body->posr.pos[1] ) );
+            r[2] = ( joint->node[0].body->posr.pos[2] -
+                ( anchor2[2] + joint->node[1].body->posr.pos[2] ) );
+        }
+        else
+        {
+            //N.B. When there is no body 2 the joint->anchor2 is already in
+            //     global coordinates
+            // r = joint->node[0].body->posr.pos -  joint->anchor2;
+            dSubtractVectors3( r, joint->node[0].body->posr.pos, joint->anchor2 );
+        }
+
+        // The body1 can have velocity coming from the rotation of
+        // the rotoide axis. We need to remove this.
+
+        // N.B. We do vel = r X w instead of vel = w x r to have vel negative
+        //      since we want to remove it from the linear velocity of the body
+        dVector3 lvel1;
+        dCalcVectorCross3( lvel1, r, joint->node[0].body->avel );
+
+        // lvel1 += joint->node[0].body->lvel;
+        dAddVectors3( lvel1, lvel1, joint->node[0].body->lvel );
+
+        // Since we want rate of change along the prismatic axis
+        // get axisP1 in global coordinates and get the component
+        // along this axis only
+        dVector3 axP1;
+        dMultiply0_331( axP1, joint->node[0].body->posr.R, joint->axisP1 );
+
+        if ( joint->node[1].body )
+        {
+            // Find the contribution of the angular rotation to the linear speed
+            // N.B. We do vel = r X w instead of vel = w x r to have vel negative
+            //      since we want to remove it from the linear velocity of the body
+            dVector3 lvel2;
+            dCalcVectorCross3( lvel2, anchor2, joint->node[1].body->avel );
+
+            // lvel1 -=  lvel2 + joint->node[1].body->lvel;
+            dVector3 tmp;
+            dAddVectors3( tmp, lvel2, joint->node[1].body->lvel );
+            dSubtractVectors3( lvel1, lvel1, tmp );
+
+            return dCalcVectorDot3( axP1, lvel1 );
+        }
+        else
+        {
+            dReal rate = dCalcVectorDot3( axP1, lvel1 );
+            return ( (joint->flags & dJOINT_REVERSE) ? -rate : rate);
+        }
+    }
+
+    return 0.0;
+}
+
+
+
+void 
+dxJointPU::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 6;
+}
+
+
+
+void
+dxJointPU::getInfo1( dxJoint::Info1 *info )
+{
+    info->m = 3;
+    info->nub = 3;
+
+    // powered needs an extra constraint row
+
+    // see if we're at a joint limit.
+    limotP.limit = 0;
+    if (( limotP.lostop > -dInfinity || limotP.histop < dInfinity ) &&
+        limotP.lostop <= limotP.histop )
+    {
+        // measure joint position
+        dReal pos = dJointGetPUPosition( this );
+        limotP.testRotationalLimit( pos );  // N.B. The function is ill named
+    }
+
+    if ( limotP.limit || limotP.fmax > 0 ) info->m++;
+
+
+    bool limiting1 = ( limot1.lostop >= -M_PI || limot1.histop <= M_PI ) &&
+        limot1.lostop <= limot1.histop;
+    bool limiting2 = ( limot2.lostop >= -M_PI || limot2.histop <= M_PI ) &&
+        limot2.lostop <= limot2.histop;
+
+    // We need to call testRotationLimit() even if we're motored, since it
+    // records the result.
+    limot1.limit = 0;
+    limot2.limit = 0;
+    if ( limiting1 || limiting2 )
+    {
+        dReal angle1, angle2;
+        getAngles( &angle1, &angle2 );
+        if ( limiting1 )
+            limot1.testRotationalLimit( angle1 );
+        if ( limiting2 )
+            limot2.testRotationalLimit( angle2 );
+    }
+
+    if ( limot1.limit || limot1.fmax > 0 ) info->m++;
+    if ( limot2.limit || limot2.fmax > 0 ) info->m++;
+}
+
+
+
+void
+dxJointPU::getInfo2( dReal worldFPS, dReal worldERP, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    const dReal k = worldFPS * worldERP;
+
+    // ======================================================================
+    // The angular constraint
+    //
+    dVector3 ax1, ax2; // Global axes of rotation
+    getAxis(this, ax1, axis1);
+    getAxis2(this,ax2, axis2);
+
+    dVector3 uniPerp;  // Axis perpendicular to axes of rotation
+    dCalcVectorCross3(uniPerp,ax1,ax2);
+    dNormalize3( uniPerp );
+
+    dCopyVector3( J1 + GI2__JA_MIN, uniPerp );
+
+    dxBody *body1 = node[1].body;
+
+    if ( body1 ) {
+        dCopyNegatedVector3( J2 + GI2__JA_MIN , uniPerp );
+    }
+    // Corrective velocity attempting to keep uni axes perpendicular
+    dReal val = dCalcVectorDot3( ax1, ax2 );
+    // Small angle approximation : 
+    // theta = asin(val)
+    // theta is approximately val when val is near zero.
+    pairRhsCfm[GI2_RHS] = -k * val; 
+    
+    // ==========================================================================
+    // Handle axes orthogonal to the prismatic 
+    dVector3 an1, an2; // Global anchor positions
+    dVector3 axP, sep; // Prismatic axis and separation vector
+    getAnchor(this, an1, anchor1);
+    getAnchor2(this, an2, anchor2);
+
+    if (flags & dJOINT_REVERSE) {
+        getAxis2(this, axP, axisP1);
+    } else {
+        getAxis(this, axP, axisP1);
+    }
+    dSubtractVectors3(sep, an2, an1);
+
+    dVector3 p, q;
+    dPlaneSpace(axP, p, q);
+
+    dCopyVector3( J1 + rowskip + GI2__JL_MIN, p );
+    dCopyVector3( J1 + 2 * rowskip + GI2__JL_MIN, q );
+    // Make the anchors be body local
+    // Aliasing isn't a problem here.
+    dSubtractVectors3(an1, an1, node[0].body->posr.pos);
+    dCalcVectorCross3( J1 + rowskip + GI2__JA_MIN, an1, p );
+    dCalcVectorCross3( J1 + 2 * rowskip + GI2__JA_MIN, an1, q );
+
+    if (body1) {
+        dCopyNegatedVector3( J2 + rowskip + GI2__JL_MIN, p );
+        dCopyNegatedVector3( J2 + 2 * rowskip + GI2__JL_MIN, q );
+        dSubtractVectors3(an2, an2, body1->posr.pos);
+        dCalcVectorCross3( J2 + rowskip + GI2__JA_MIN, p, an2 );
+        dCalcVectorCross3( J2 + 2 * rowskip + GI2__JA_MIN, q, an2 );
+    }
+
+    pairRhsCfm[pairskip + GI2_RHS] = k * dCalcVectorDot3( p, sep );
+    pairRhsCfm[2 * pairskip + GI2_RHS] = k * dCalcVectorDot3( q, sep );
+    
+    // ==========================================================================
+    // Handle the limits/motors
+    int currRowSkip = 3 * rowskip, currPairSkip = 3 * pairskip;
+
+    if (limot1.addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax1, 1 )) {
+        currRowSkip += rowskip; currPairSkip += pairskip;
+    }
+
+    if (limot2.addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax2, 1 )) {
+        currRowSkip += rowskip; currPairSkip += pairskip;
+    }
+
+    if (  body1 || (flags & dJOINT_REVERSE) == 0 ) {
+        limotP.addTwoPointLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, axP, an1, an2 );
+    } else {
+        dNegateVector3(axP);
+        limotP.addTwoPointLimot ( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, axP, an1, an2  );
+    }
+}
+
+void dJointSetPUAnchor( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+    setAnchors( joint, x, y, z, joint->anchor1, joint->anchor2 );
+    joint->computeInitialRelativeRotations();
+}
+
+/**
+ * This function initialize the anchor and the relative position of each body
+ * as if body2 was at its current position + [dx,dy,dy].
+ * Ex:
+ * <PRE>
+ * dReal offset = 1;
+ * dVector3 dir;
+ * dJointGetPUAxis3(jId, dir);
+ * dJointSetPUAnchor(jId, 0, 0, 0);
+ * // If you request the position you will have: dJointGetPUPosition(jId) == 0
+ * dJointSetPUAnchorDelta(jId, 0, 0, 0, dir[X]*offset, dir[Y]*offset, dir[Z]*offset);
+ * // If you request the position you will have: dJointGetPUPosition(jId) == -offset
+ * </PRE>
+
+ * @param j The PU joint for which the anchor point will be set
+ * @param x The X position of the anchor point in world frame
+ * @param y The Y position of the anchor point in world frame
+ * @param z The Z position of the anchor point in world frame
+ * @param dx A delta to be added to the X position as if the anchor was set
+ *           when body1 was at current_position[X] + dx
+ * @param dx A delta to be added to the Y position as if the anchor was set
+ *           when body1 was at current_position[Y] + dy
+ * @param dx A delta to be added to the Z position as if the anchor was set
+ *           when body1 was at current_position[Z] + dz
+ * @note Should have the same meaning as dJointSetSliderAxisDelta
+ */
+void dJointSetPUAnchorDelta( dJointID j, dReal x, dReal y, dReal z,
+                            dReal dx, dReal dy, dReal dz )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+
+    if ( joint->node[0].body )
+    {
+        joint->node[0].body->posr.pos[0] += dx;
+        joint->node[0].body->posr.pos[1] += dy;
+        joint->node[0].body->posr.pos[2] += dz;
+    }
+
+    setAnchors( joint, x, y, z, joint->anchor1, joint->anchor2 );
+
+    if ( joint->node[0].body )
+    {
+        joint->node[0].body->posr.pos[0] -= dx;
+        joint->node[0].body->posr.pos[1] -= dy;
+        joint->node[0].body->posr.pos[2] -= dz;
+    }
+
+    joint->computeInitialRelativeRotations();
+}
+
+/**
+ * \brief This function initialize the anchor and the relative position of each body
+ * such that dJointGetPUPosition will return the dot product of axis and [dx,dy,dy].
+ *
+ * The body 1 is moved to [-dx, -dy, -dx] then the anchor is set. This will be the
+ * position 0 for the prismatic part of the joint. Then the body 1 is moved to its
+ * original position.
+ *
+ * Ex:
+ * <PRE>
+ * dReal offset = 1;
+ * dVector3 dir;
+ * dJointGetPUAxis3(jId, dir);
+ * dJointSetPUAnchor(jId, 0, 0, 0);
+ * // If you request the position you will have: dJointGetPUPosition(jId) == 0
+ * dJointSetPUAnchorDelta(jId, 0, 0, 0, dir[X]*offset, dir[Y]*offset, dir[Z]*offset);
+ * // If you request the position you will have: dJointGetPUPosition(jId) == offset
+ * </PRE>
+
+ * @param j The PU joint for which the anchor point will be set
+ * @param x The X position of the anchor point in world frame
+ * @param y The Y position of the anchor point in world frame
+ * @param z The Z position of the anchor point in world frame
+ * @param dx A delta to be added to the X position as if the anchor was set
+ *           when body1 was at current_position[X] + dx
+ * @param dx A delta to be added to the Y position as if the anchor was set
+ *           when body1 was at current_position[Y] + dy
+ * @param dx A delta to be added to the Z position as if the anchor was set
+ *           when body1 was at current_position[Z] + dz
+ * @note Should have the same meaning as dJointSetSliderAxisDelta
+ */
+void dJointSetPUAnchorOffset( dJointID j, dReal x, dReal y, dReal z,
+                             dReal dx, dReal dy, dReal dz )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+
+    if (joint->flags & dJOINT_REVERSE)
+    {
+        dx = -dx;
+        dy = -dy;
+        dz = -dz;
+    }
+
+    if ( joint->node[0].body )
+    {
+        joint->node[0].body->posr.pos[0] -= dx;
+        joint->node[0].body->posr.pos[1] -= dy;
+        joint->node[0].body->posr.pos[2] -= dz;
+    }
+
+    setAnchors( joint, x, y, z, joint->anchor1, joint->anchor2 );
+
+    if ( joint->node[0].body )
+    {
+        joint->node[0].body->posr.pos[0] += dx;
+        joint->node[0].body->posr.pos[1] += dy;
+        joint->node[0].body->posr.pos[2] += dz;
+    }
+
+    joint->computeInitialRelativeRotations();
+}
+
+
+
+
+
+void dJointSetPUAxis1( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+    if ( joint->flags & dJOINT_REVERSE )
+        setAxes( joint, x, y, z, NULL, joint->axis2 );
+    else
+        setAxes( joint, x, y, z, joint->axis1, NULL );
+    joint->computeInitialRelativeRotations();
+}
+
+void dJointSetPUAxis2( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+    if ( joint->flags & dJOINT_REVERSE )
+        setAxes( joint, x, y, z, joint->axis1, NULL );
+    else
+        setAxes( joint, x, y, z, NULL, joint->axis2 );
+    joint->computeInitialRelativeRotations();
+}
+
+
+void dJointSetPUAxisP( dJointID id, dReal x, dReal y, dReal z )
+{
+    dJointSetPUAxis3( id, x, y, z );
+}
+
+
+
+void dJointSetPUAxis3( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+
+    setAxes( joint, x, y, z, joint->axisP1, 0 );
+
+    joint->computeInitialRelativeRotations();
+}
+
+
+
+
+void dJointGetPUAngles( dJointID j, dReal *angle1, dReal *angle2 )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+    if ( joint->flags & dJOINT_REVERSE )
+        joint->getAngles( angle2, angle1 );
+    else
+        joint->getAngles( angle1, angle2 );
+}
+
+
+dReal dJointGetPUAngle1( dJointID j )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+    if ( joint->flags & dJOINT_REVERSE )
+        return joint->getAngle2();
+    else
+        return joint->getAngle1();
+}
+
+
+dReal dJointGetPUAngle2( dJointID j )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+    if ( joint->flags & dJOINT_REVERSE )
+        return joint->getAngle1();
+    else
+        return joint->getAngle2();
+}
+
+
+dReal dJointGetPUAngle1Rate( dJointID j )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+
+    if ( joint->node[0].body )
+    {
+        dVector3 axis;
+
+        if ( joint->flags & dJOINT_REVERSE )
+            getAxis2( joint, axis, joint->axis2 );
+        else
+            getAxis( joint, axis, joint->axis1 );
+
+        dReal rate = dCalcVectorDot3( axis, joint->node[0].body->avel );
+        if ( joint->node[1].body ) rate -= dCalcVectorDot3( axis, joint->node[1].body->avel );
+        return rate;
+    }
+    return 0;
+}
+
+
+dReal dJointGetPUAngle2Rate( dJointID j )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+
+    if ( joint->node[0].body )
+    {
+        dVector3 axis;
+
+        if ( joint->flags & dJOINT_REVERSE )
+            getAxis( joint, axis, joint->axis1 );
+        else
+            getAxis2( joint, axis, joint->axis2 );
+
+        dReal rate = dCalcVectorDot3( axis, joint->node[0].body->avel );
+        if ( joint->node[1].body ) rate -= dCalcVectorDot3( axis, joint->node[1].body->avel );
+        return rate;
+    }
+    return 0;
+}
+
+
+void dJointSetPUParam( dJointID j, int parameter, dReal value )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+
+    switch ( parameter & 0xff00 )
+    {
+    case dParamGroup1:
+        joint->limot1.set( parameter, value );
+        break;
+    case dParamGroup2:
+        joint->limot2.set( parameter & 0xff, value );
+        break;
+    case dParamGroup3:
+        joint->limotP.set( parameter & 0xff, value );
+        break;
+    }
+}
+
+void dJointGetPUAnchor( dJointID j, dVector3 result )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, PU );
+
+    if ( joint->node[1].body )
+        getAnchor2( joint, result, joint->anchor2 );
+    else
+    {
+        // result[i] = joint->anchor2[i];
+        dCopyVector3( result, joint->anchor2 );
+    }
+}
+
+void dJointGetPUAxis1( dJointID j, dVector3 result )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, PU );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAxis2( joint, result, joint->axis2 );
+    else
+        getAxis( joint, result, joint->axis1 );
+}
+
+void dJointGetPUAxis2( dJointID j, dVector3 result )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, PU );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAxis( joint, result, joint->axis1 );
+    else
+        getAxis2( joint, result, joint->axis2 );
+}
+
+/**
+ * @brief Get the prismatic axis
+ * @ingroup joints
+ *
+ * @note This function was added for convenience it is the same as
+ *       dJointGetPUAxis3
+ */
+void dJointGetPUAxisP( dJointID id, dVector3 result )
+{
+    dJointGetPUAxis3( id, result );
+}
+
+
+void dJointGetPUAxis3( dJointID j, dVector3 result )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, PU );
+    getAxis( joint, result, joint->axisP1 );
+}
+
+dReal dJointGetPUParam( dJointID j, int parameter )
+{
+    dxJointPU* joint = ( dxJointPU* ) j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, PU );
+
+    switch ( parameter & 0xff00 )
+    {
+    case dParamGroup1:
+        return joint->limot1.get( parameter );
+        break;
+    case dParamGroup2:
+        return joint->limot2.get( parameter & 0xff );
+        break;
+    case dParamGroup3:
+        return joint->limotP.get( parameter & 0xff );
+        break;
+    }
+
+    return 0;
+}
+
+
+dJointType
+dxJointPU::type() const
+{
+    return dJointTypePU;
+}
+
+
+sizeint
+dxJointPU::size() const
+{
+    return sizeof( *this );
+}
+
+
+void
+dxJointPU::setRelativeValues()
+{
+    dVector3 anchor;
+    dJointGetPUAnchor(this, anchor);
+    setAnchors( this, anchor[0], anchor[1], anchor[2], anchor1, anchor2 );
+
+    dVector3 ax1, ax2, ax3;
+    dJointGetPUAxis1(this, ax1);
+    dJointGetPUAxis2(this, ax2);
+    dJointGetPUAxis3(this, ax3);
+
+    if ( flags & dJOINT_REVERSE )
+    {
+        setAxes( this, ax1[0], ax1[1], ax1[2], NULL, axis2 );
+        setAxes( this, ax2[0], ax2[1], ax2[2], axis1, NULL );
+    }
+    else
+    {
+        setAxes( this, ax1[0], ax1[1], ax1[2], axis1, NULL );
+        setAxes( this, ax2[0], ax2[1], ax2[2], NULL, axis2 );
+    }
+
+
+    setAxes( this, ax3[0], ax3[1], ax3[2], axisP1, NULL );
+
+    computeInitialRelativeRotations();
+}
+
diff --git a/libs/ode-0.16.1/ode/src/joints/pu.h b/libs/ode-0.16.1/ode/src/joints/pu.h
new file mode 100644
index 0000000..34f7392
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/pu.h
@@ -0,0 +1,88 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_PU_H_
+#define _ODE_JOINT_PU_H_
+
+#include "universal.h"
+
+
+
+/**
+ * Component of a Prismatic -- Universal joint.
+ * The axisP must be perpendicular to axis1.
+ * The second axis of the universal joint is perpendicular to axis1.
+ *
+ * Since the PU joint is derived from the Universal joint. Some variable
+ * are reused.
+ *
+ * anchor1: Vector from body1 to the anchor point
+ *          This vector is calculated when the body are attached or
+ *          when the anchor point is set. It is like the offset of the Slider
+ *          joint. Since their is a prismatic between the anchor and the body1
+ *          the distance might change as the simulation goes on.
+ * anchor2: Vector from body2 to the anchor point.
+ * <PRE>
+ *                                                 Body 2
+ *                                                 +-------------+
+ *                                                 |      x      |
+ *                                                 +------------\+
+ *          Prismatic articulation                   ..     ..
+ *                                |                ..     ..
+ *          Body 1                v             ..      ..
+ *          +--------------+    --|        __..      ..  anchor2
+ * <--------|      x       | .....|.......(__)     ..
+ * axisP    +--------------+    --|         ^     <
+ *                 |----------------------->|
+ *                     anchor1              |--- Universal articulation
+ *                                               axis1 going out of the plane
+ *                                               axis2 is perpendicular to axis1
+ *                                               (i.e. 2 rotoides)
+ * </PRE>
+ */
+struct dxJointPU : public dxJointUniversal
+{
+
+    /// @brief Axis for the prismatic articulation w.r.t first body.
+    /// @note This is considered as axis2 from the parameter view
+    dVector3 axisP1;
+
+    dxJointLimitMotor limotP; ///< limit and motor information for the prismatic articulation.
+
+
+    dxJointPU( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+
+    virtual void setRelativeValues();
+};
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/slider.cpp b/libs/ode-0.16.1/ode/src/joints/slider.cpp
new file mode 100644
index 0000000..2c9b008
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/slider.cpp
@@ -0,0 +1,423 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "slider.h"
+#include "joint_internal.h"
+
+
+
+//****************************************************************************
+// slider
+
+dxJointSlider::dxJointSlider ( dxWorld *w ) :
+    dxJoint ( w )
+{
+    dSetZero ( axis1, 4 );
+    axis1[0] = 1;
+    dSetZero ( qrel, 4 );
+    dSetZero ( offset, 4 );
+    limot.init ( world );
+}
+
+
+dReal dJointGetSliderPosition ( dJointID j )
+{
+    dxJointSlider* joint = ( dxJointSlider* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Slider );
+
+    // get axis1 in global coordinates
+    dVector3 ax1, q;
+    dMultiply0_331 ( ax1, joint->node[0].body->posr.R, joint->axis1 );
+
+    if ( joint->node[1].body )
+    {
+        // get body2 + offset point in global coordinates
+        dMultiply0_331 ( q, joint->node[1].body->posr.R, joint->offset );
+        for ( int i = 0; i < 3; i++ )
+            q[i] = joint->node[0].body->posr.pos[i]
+                - q[i]
+                - joint->node[1].body->posr.pos[i];
+    }
+    else
+    {
+        q[0] = joint->node[0].body->posr.pos[0] - joint->offset[0];
+        q[1] = joint->node[0].body->posr.pos[1] - joint->offset[1];
+        q[2] = joint->node[0].body->posr.pos[2] - joint->offset[2];
+
+        if ( joint->flags & dJOINT_REVERSE )
+        {
+            // N.B. it could have been simplier to only inverse the sign of
+            //      the dCalcVectorDot3 result but this case is exceptional and doing
+            //      the check for all case can decrease the performance.
+            ax1[0] = -ax1[0];
+            ax1[1] = -ax1[1];
+            ax1[2] = -ax1[2];
+        }
+    }
+
+    return dCalcVectorDot3 ( ax1, q );
+}
+
+
+dReal dJointGetSliderPositionRate ( dJointID j )
+{
+    dxJointSlider* joint = ( dxJointSlider* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Slider );
+
+    // get axis1 in global coordinates
+    dVector3 ax1;
+    dMultiply0_331 ( ax1, joint->node[0].body->posr.R, joint->axis1 );
+
+    if ( joint->node[1].body )
+    {
+        return dCalcVectorDot3 ( ax1, joint->node[0].body->lvel ) -
+            dCalcVectorDot3 ( ax1, joint->node[1].body->lvel );
+    }
+    else
+    {
+        dReal rate = dCalcVectorDot3 ( ax1, joint->node[0].body->lvel );
+        if ( joint->flags & dJOINT_REVERSE ) rate = - rate;
+        return rate;
+    }
+}
+
+
+void 
+dxJointSlider::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 6;
+}
+
+
+void
+dxJointSlider::getInfo1 ( dxJoint::Info1 *info )
+{
+    info->nub = 5;
+
+    // see if joint is powered
+    if ( limot.fmax > 0 )
+        info->m = 6; // powered slider needs an extra constraint row
+    else info->m = 5;
+
+    // see if we're at a joint limit.
+    limot.limit = 0;
+    if ( ( limot.lostop > -dInfinity || limot.histop < dInfinity ) &&
+        limot.lostop <= limot.histop )
+    {
+        // measure joint position
+        dReal pos = dJointGetSliderPosition ( this );
+        if ( pos <= limot.lostop )
+        {
+            limot.limit = 1;
+            limot.limit_err = pos - limot.lostop;
+            info->m = 6;
+        }
+        else if ( pos >= limot.histop )
+        {
+            limot.limit = 2;
+            limot.limit_err = pos - limot.histop;
+            info->m = 6;
+        }
+    }
+}
+
+
+void
+dxJointSlider::getInfo2 ( dReal worldFPS, dReal worldERP, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    // 3 rows to make body rotations equal
+    setFixedOrientation ( this, worldFPS, worldERP, rowskip, J1, J2, pairskip, pairRhsCfm, qrel );
+
+    // pull out pos and R for both bodies. also get the `connection'
+    // vector pos2-pos1.
+    dVector3 c;
+    dReal *pos2 = NULL, *R2 = NULL;
+
+    dReal *pos1 = node[0].body->posr.pos;
+    dReal *R1 = node[0].body->posr.R;
+
+    dVector3 ax1; // joint axis in global coordinates (unit length)
+    dVector3 p, q; // plane space of ax1
+    dMultiply0_331 ( ax1, R1, axis1 );
+    dPlaneSpace ( ax1, p, q );
+
+    dxBody *body1 = node[1].body;
+    
+    if ( body1 )
+    {
+        R2 = body1->posr.R;
+        pos2 = body1->posr.pos;
+        dSubtractVectors3( c, pos2, pos1 );
+    }
+
+    // remaining two rows. we want: vel2 = vel1 + w1 x c ... but this would
+    // result in three equations, so we project along the planespace vectors
+    // so that sliding along the slider axis is disregarded. for symmetry we
+    // also substitute (w1+w2)/2 for w1, as w1 is supposed to equal w2.
+    int currRowSkip = 3 * rowskip, currPairSkip = 3 * pairskip;
+    {
+        dCopyVector3( J1 + currRowSkip + GI2__JL_MIN, p );
+
+        if ( body1 )
+        {
+            dVector3 tmp;
+
+            dCopyNegatedVector3(J2 + currRowSkip + GI2__JL_MIN, p);
+
+            dCalcVectorCross3( tmp, c, p );
+            dCopyScaledVector3( J1 + currRowSkip + GI2__JA_MIN, tmp, REAL(0.5) );
+            dCopyVector3( J2 + currRowSkip + GI2__JA_MIN, J1 + currRowSkip + GI2__JA_MIN );
+        }
+    }
+
+    currRowSkip += rowskip;
+    {
+        dCopyVector3( J1 + currRowSkip + GI2__JL_MIN, q );
+
+        if ( body1 )
+        {
+            dVector3 tmp;
+
+            dCopyNegatedVector3(J2 + currRowSkip + GI2__JL_MIN, q);
+
+            dCalcVectorCross3( tmp, c, q );
+            dCopyScaledVector3( J1 + currRowSkip + GI2__JA_MIN, tmp, REAL(0.5) );
+            dCopyVector3( J2 + currRowSkip + GI2__JA_MIN, J1 + currRowSkip + GI2__JA_MIN );
+        }
+    }
+
+    // compute last two elements of right hand side. we want to align the offset
+    // point (in body 2's frame) with the center of body 1.
+    dReal k = worldFPS * worldERP;
+
+    if ( body1 )
+    {
+        dVector3 ofs;  // offset point in global coordinates
+        dMultiply0_331 ( ofs, R2, offset );
+        dAddVectors3(c, c, ofs);
+        
+        pairRhsCfm[currPairSkip + GI2_RHS] = k * dCalcVectorDot3 ( p, c );
+
+        currPairSkip += pairskip;
+        pairRhsCfm[currPairSkip + GI2_RHS] = k * dCalcVectorDot3 ( q, c );
+    }
+    else
+    {
+        dVector3 ofs;  // offset point in global coordinates
+        dSubtractVectors3(ofs, offset, pos1);
+        
+        pairRhsCfm[currPairSkip + GI2_RHS] = k * dCalcVectorDot3 ( p, ofs );
+        
+        currPairSkip += pairskip;
+        pairRhsCfm[currPairSkip + GI2_RHS] = k * dCalcVectorDot3 ( q, ofs );
+
+        if ( (flags & dJOINT_REVERSE) != 0 )
+        {
+            dNegateVector3(ax1);
+        }
+    }
+
+    // if the slider is powered, or has joint limits, add in the extra row
+    currRowSkip += rowskip; currPairSkip += pairskip;
+    limot.addLimot ( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax1, 0 );
+}
+
+
+void dJointSetSliderAxis ( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointSlider* joint = ( dxJointSlider* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Slider );
+    setAxes ( joint, x, y, z, joint->axis1, 0 );
+
+    joint->computeOffset();
+
+    joint->computeInitialRelativeRotation();
+}
+
+
+void dJointSetSliderAxisDelta ( dJointID j, dReal x, dReal y, dReal z, dReal dx, dReal dy, dReal dz )
+{
+    dxJointSlider* joint = ( dxJointSlider* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Slider );
+    setAxes ( joint, x, y, z, joint->axis1, 0 );
+
+    joint->computeOffset();
+
+    // compute initial relative rotation body1 -> body2, or env -> body1
+    // also compute center of body1 w.r.t body 2
+    if ( !(joint->node[1].body) )
+    {
+        joint->offset[0] += dx;
+        joint->offset[1] += dy;
+        joint->offset[2] += dz;
+    }
+
+    joint->computeInitialRelativeRotation();
+}
+
+
+
+void dJointGetSliderAxis ( dJointID j, dVector3 result )
+{
+    dxJointSlider* joint = ( dxJointSlider* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    dUASSERT ( result, "bad result argument" );
+    checktype ( joint, Slider );
+    getAxis ( joint, result, joint->axis1 );
+}
+
+
+void dJointSetSliderParam ( dJointID j, int parameter, dReal value )
+{
+    dxJointSlider* joint = ( dxJointSlider* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Slider );
+    joint->limot.set ( parameter, value );
+}
+
+
+dReal dJointGetSliderParam ( dJointID j, int parameter )
+{
+    dxJointSlider* joint = ( dxJointSlider* ) j;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Slider );
+    return joint->limot.get ( parameter );
+}
+
+
+void dJointAddSliderForce ( dJointID j, dReal force )
+{
+    dxJointSlider* joint = ( dxJointSlider* ) j;
+    dVector3 axis;
+    dUASSERT ( joint, "bad joint argument" );
+    checktype ( joint, Slider );
+
+    if ( joint->flags & dJOINT_REVERSE )
+        force = -force;
+
+    getAxis ( joint, axis, joint->axis1 );
+    axis[0] *= force;
+    axis[1] *= force;
+    axis[2] *= force;
+
+    if ( joint->node[0].body != 0 )
+        dBodyAddForce ( joint->node[0].body, axis[0], axis[1], axis[2] );
+    if ( joint->node[1].body != 0 )
+        dBodyAddForce ( joint->node[1].body, -axis[0], -axis[1], -axis[2] );
+
+    if ( joint->node[0].body != 0 && joint->node[1].body != 0 )
+    {
+        // linear torque decoupling:
+        // we have to compensate the torque, that this slider force may generate
+        // if body centers are not aligned along the slider axis
+
+        dVector3 ltd; // Linear Torque Decoupling vector (a torque)
+
+        dVector3 c;
+        c[0] = REAL ( 0.5 ) * ( joint->node[1].body->posr.pos[0] - joint->node[0].body->posr.pos[0] );
+        c[1] = REAL ( 0.5 ) * ( joint->node[1].body->posr.pos[1] - joint->node[0].body->posr.pos[1] );
+        c[2] = REAL ( 0.5 ) * ( joint->node[1].body->posr.pos[2] - joint->node[0].body->posr.pos[2] );
+        dCalcVectorCross3( ltd, c, axis );
+
+        dBodyAddTorque ( joint->node[0].body, ltd[0], ltd[1], ltd[2] );
+        dBodyAddTorque ( joint->node[1].body, ltd[0], ltd[1], ltd[2] );
+    }
+}
+
+
+dJointType
+dxJointSlider::type() const
+{
+    return dJointTypeSlider;
+}
+
+
+sizeint
+dxJointSlider::size() const
+{
+    return sizeof ( *this );
+}
+
+
+void
+dxJointSlider::setRelativeValues()
+{
+    computeOffset();
+    computeInitialRelativeRotation();
+}
+
+
+
+/// Compute initial relative rotation body1 -> body2, or env -> body1
+void
+dxJointSlider::computeInitialRelativeRotation()
+{
+    if ( node[0].body )
+    {
+        // compute initial relative rotation body1 -> body2, or env -> body1
+        // also compute center of body1 w.r.t body 2
+        if ( node[1].body )
+        {
+            dQMultiply1 ( qrel, node[0].body->q, node[1].body->q );
+        }
+        else
+        {
+            // set qrel to the transpose of the first body's q
+            qrel[0] =  node[0].body->q[0];
+            qrel[1] = -node[0].body->q[1];
+            qrel[2] = -node[0].body->q[2];
+            qrel[3] = -node[0].body->q[3];
+        }
+    }
+}
+
+
+/// Compute center of body1 w.r.t body 2
+void
+dxJointSlider::computeOffset()
+{
+    if ( node[1].body )
+    {
+        dVector3 c;
+        c[0] = node[0].body->posr.pos[0] - node[1].body->posr.pos[0];
+        c[1] = node[0].body->posr.pos[1] - node[1].body->posr.pos[1];
+        c[2] = node[0].body->posr.pos[2] - node[1].body->posr.pos[2];
+
+        dMultiply1_331 ( offset, node[1].body->posr.R, c );
+    }
+    else if ( node[0].body )
+    {
+        offset[0] = node[0].body->posr.pos[0];
+        offset[1] = node[0].body->posr.pos[1];
+        offset[2] = node[0].body->posr.pos[2];
+    }
+}
diff --git a/libs/ode-0.16.1/ode/src/joints/slider.h b/libs/ode-0.16.1/ode/src/joints/slider.h
new file mode 100644
index 0000000..de6201e
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/slider.h
@@ -0,0 +1,59 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_SLIDER_H_
+#define _ODE_JOINT_SLIDER_H_
+
+#include "joint.h"
+
+
+// slider. if body2 is 0 then qrel is the absolute rotation of body1 and
+// offset is the position of body1 center along axis1.
+
+struct dxJointSlider : public dxJoint
+{
+    dVector3 axis1;     // axis w.r.t first body
+    dQuaternion qrel;   // initial relative rotation body1 -> body2
+    dVector3 offset;    // point relative to body2 that should be
+    // aligned with body1 center along axis1
+    dxJointLimitMotor limot; // limit and motor information
+
+    dxJointSlider ( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1 ( Info1* info );
+    virtual void getInfo2 ( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+    virtual void setRelativeValues();
+
+    void computeInitialRelativeRotation();
+
+    void computeOffset();
+};
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/joints/transmission.cpp b/libs/ode-0.16.1/ode/src/joints/transmission.cpp
new file mode 100644
index 0000000..825b9e2
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/transmission.cpp
@@ -0,0 +1,698 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "transmission.h"
+#include "joint_internal.h"
+
+namespace {
+    static inline dReal clamp(dReal x, dReal minX, dReal maxX)
+    {
+        return x < minX ? minX : (x > maxX ? maxX : x);
+    }
+}
+
+/*
+ * Transmission joint
+ */
+
+dxJointTransmission::dxJointTransmission(dxWorld* w) :
+    dxJoint(w)
+{
+    int i;
+    
+    flags |= dJOINT_TWOBODIES;
+    mode = dTransmissionParallelAxes;
+
+    cfm = world->global_cfm;
+    erp = world->global_erp;
+    
+    for (i = 0 ; i < 2 ; i += 1) {
+        dSetZero( anchors[i], 4 );
+        dSetZero( axes[i], 4 );
+        axes[i][0] = 1;
+
+        radii[i] = 0;
+    }
+    
+    backlash = 0;
+    ratio = 1;
+    update = 1;
+}
+
+void
+dxJointTransmission::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 1;
+}
+
+void
+dxJointTransmission::getInfo1( dxJoint::Info1* info )
+{
+    // If there's backlash in the gears then constraint must be
+    // unilateral, that is the driving gear can only push the driven
+    // gear in one direction.  In order to push it in the other it
+    // first needs to traverse the backlash gap.
+
+    info->m = 1;
+    info->nub = backlash > 0 ? 0 : 1;
+}
+
+void
+dxJointTransmission::getInfo2( dReal worldFPS, dReal /*worldERP*/,
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+ {
+    dVector3 a[2], n[2], l[2], r[2], c[2], s, t, O, d, z, u, v;
+    dReal theta, delta, nn, na_0, na_1, cosphi, sinphi, m;
+    const dReal *p[2], *omega[2];
+    int i;
+
+    // Transform all needed quantities to the global frame.
+
+    for (i = 0 ; i < 2 ; i += 1) {
+        dBodyGetRelPointPos(node[i].body,
+                            anchors[i][0], anchors[i][1], anchors[i][2],
+                            a[i]);
+
+        dBodyVectorToWorld(node[i].body, axes[i][0], axes[i][1], axes[i][2],
+                           n[i]);
+
+        p[i] = dBodyGetPosition(node[i].body);
+        omega[i] = dBodyGetAngularVel(node[i].body);
+    }
+
+    if (update) {
+        // Make sure both gear reference frames end up with the same
+        // handedness.
+    
+        if (dCalcVectorDot3(n[0], n[1]) < 0) {
+            dNegateVector3(axes[0]);
+            dNegateVector3(n[0]);
+        }
+    }
+
+    // Calculate the mesh geometry based on the current mode.
+    
+    switch (mode) {
+    case dTransmissionParallelAxes:
+        // Simply calculate the contact point as the point on the
+        // baseline that will yield the correct ratio.
+
+        dIASSERT (ratio > 0);
+        
+        dSubtractVectors3(d, a[1], a[0]);
+        dAddVectorScaledVector3(c[0], a[0], d, ratio / (1 + ratio));
+        dCopyVector3(c[1], c[0]);
+        
+        dNormalize3(d);
+        
+        for (i = 0 ; i < 2 ; i += 1) {
+            dCalcVectorCross3(l[i], d, n[i]);
+        }
+
+        break;
+    case dTransmissionIntersectingAxes:
+        // Calculate the line of intersection between the planes of the
+        // gears.
+
+        dCalcVectorCross3(l[0], n[0], n[1]);
+        dCopyVector3(l[1], l[0]);
+
+        nn = dCalcVectorDot3(n[0], n[1]);
+        dIASSERT(fabs(nn) != 1);
+        
+        na_0 = dCalcVectorDot3(n[0], a[0]);
+        na_1 = dCalcVectorDot3(n[1], a[1]);
+
+        dAddScaledVectors3(O, n[0], n[1],
+                           (na_0 - na_1 * nn) / (1 - nn * nn),
+                           (na_1 - na_0 * nn) / (1 - nn * nn));
+
+        // Find the contact point as:
+        //
+        // c = ((r_a - O) . l) l + O
+        //
+        // where r_a the anchor point of either gear and l, O the tangent
+        // line direction and origin.
+
+        for (i = 0 ; i < 2 ; i += 1) {
+            dSubtractVectors3(d, a[i], O);
+            m = dCalcVectorDot3(d, l[i]);        
+            dAddVectorScaledVector3(c[i], O, l[i], m);
+        }
+
+        break;
+    case dTransmissionChainDrive:
+        dSubtractVectors3(d, a[0], a[1]);
+        m = dCalcVectorLength3(d);
+
+        dIASSERT(m > 0);
+        
+        // Caclulate the angle of the contact point relative to the
+        // baseline.
+
+        cosphi = clamp((radii[1] - radii[0]) / m, REAL(-1.0), REAL(1.0)); // Force into range to fix possible computation errors
+        sinphi = dSqrt (REAL(1.0) - cosphi * cosphi);
+
+        dNormalize3(d);
+
+        for (i = 0 ; i < 2 ; i += 1) {
+            // Calculate the contact radius in the local reference
+            // frame of the chain.  This has axis x pointing along the
+            // baseline, axis y pointing along the sprocket axis and
+            // the remaining axis normal to both.
+
+            u[0] = radii[i] * cosphi;
+            u[1] = 0;
+            u[2] = radii[i] * sinphi;
+
+            // Transform the contact radius into the global frame.
+
+            dCalcVectorCross3(z, d, n[i]);
+            
+            v[0] = dCalcVectorDot3(d, u);
+            v[1] = dCalcVectorDot3(n[i], u);
+            v[2] = dCalcVectorDot3(z, u);
+
+            // Finally calculate contact points and l.
+            
+            dAddVectors3(c[i], a[i], v);
+            dCalcVectorCross3(l[i], v, n[i]);
+            dNormalize3(l[i]);
+
+            // printf ("%d: %f, %f, %f\n",
+            //      i, l[i][0], l[i][1], l[i][2]);
+        }
+
+        break;
+    }
+
+    if (update) {
+        // We need to calculate an initial reference frame for each
+        // wheel which we can measure the current phase against.  This
+        // frame will have the initial contact radius as the x axis,
+        // the wheel axis as the z axis and their cross product as the
+        // y axis.
+
+        for (i = 0 ; i < 2 ; i += 1) {
+            dSubtractVectors3 (r[i], c[i], a[i]);
+            radii[i] = dCalcVectorLength3(r[i]);
+            dIASSERT(radii[i] > 0);
+            
+            dBodyVectorFromWorld(node[i].body, r[i][0], r[i][1], r[i][2],
+                                 reference[i]);
+            dNormalize3(reference[i]);
+            dCopyVector3(reference[i] + 8, axes[i]);
+            dCalcVectorCross3(reference[i] + 4, reference[i] + 8, reference[i]);
+
+            // printf ("%f\n", dDOT(r[i], n[i]));
+            // printf ("(%f, %f, %f,\n %f, %f, %f,\n %f, %f, %f)\n",
+            //      reference[i][0],reference[i][1],reference[i][2],
+            //      reference[i][4],reference[i][5],reference[i][6],
+            //      reference[i][8],reference[i][9],reference[i][10]);
+
+            phase[i] = 0;
+        }
+
+        ratio = radii[0] / radii[1];
+        update = 0;
+    }
+    
+    for (i = 0 ; i < 2 ; i += 1) {
+        dReal phase_hat;
+
+        dSubtractVectors3 (r[i], c[i], a[i]);
+        
+        // Transform the (global) contact radius into the gear's
+        // reference frame.
+
+        dBodyVectorFromWorld (node[i].body, r[i][0], r[i][1], r[i][2], s);
+        dMultiply0_331(t, reference[i], s);
+
+        // Now simply calculate its angle on the plane relative to the
+        // x-axis which is the initial contact radius.  This will be
+        // an angle between -pi and pi that is coterminal with the
+        // actual phase of the wheel.  To find the real phase we
+        // estimate it by adding omega * dt to the old phase and then
+        // find the closest angle to that, that is coterminal to
+        // theta.
+
+        theta = atan2(t[1], t[0]);
+        phase_hat = phase[i] + dCalcVectorDot3(omega[i], n[i]) / worldFPS;
+
+        if (phase_hat > M_PI_2) {
+            if (theta < 0) {
+                theta += (dReal)(2 * M_PI);
+            }
+
+            theta += (dReal)(floor(phase_hat / (2 * M_PI)) * (2 * M_PI));
+        } else if (phase_hat < -M_PI_2) {
+            if (theta > 0) {
+                theta -= (dReal)(2 * M_PI);
+            }
+
+            theta += (dReal)(ceil(phase_hat / (2 * M_PI)) * (2 * M_PI));
+        }
+                
+        if (phase_hat - theta > M_PI) {
+            phase[i] = theta + (dReal)(2 * M_PI);
+        } else if (phase_hat - theta < -M_PI) {
+            phase[i] = theta - (dReal)(2 * M_PI);
+        } else {
+            phase[i] = theta;
+        }
+
+        dIASSERT(fabs(phase_hat - phase[i]) < M_PI);
+    }
+
+    // Calculate the phase error.  Depending on the mode the condition
+    // is that the distances traveled by each contact point must be
+    // either equal (chain and sprockets) or opposite (gears).
+
+    if (mode == dTransmissionChainDrive) {
+        delta = (dCalcVectorLength3(r[0]) * phase[0] -
+                 dCalcVectorLength3(r[1]) * phase[1]);
+    } else {
+        delta = (dCalcVectorLength3(r[0]) * phase[0] +
+                 dCalcVectorLength3(r[1]) * phase[1]);
+    }
+
+    // When in chain mode a torque reversal, signified by the change
+    // in sign of the wheel phase difference, has the added effect of
+    // switching the active chain branch.  We must therefore reflect
+    // the contact points and tangents across the baseline.
+    
+    if (mode == dTransmissionChainDrive && delta < 0) {
+        dVector3 d;
+
+        dSubtractVectors3(d, a[0], a[1]);
+        
+        for (i = 0 ; i < 2 ; i += 1) {
+            dVector3 nn;
+            dReal a;
+            
+            dCalcVectorCross3(nn, n[i], d);
+            a = dCalcVectorDot3(nn, nn);
+            dIASSERT(a > 0);
+            
+            dAddScaledVectors3(c[i], c[i], nn,
+                               1, -2 * dCalcVectorDot3(c[i], nn) / a);
+            dAddScaledVectors3(l[i], l[i], nn,
+                               -1, 2 * dCalcVectorDot3(l[i], nn) / a);
+        }
+    }
+
+    // Do not add the constraint if there's backlash and we're in the
+    // backlash gap.
+
+    if (backlash == 0 || fabs(delta) > backlash) {
+        // The constraint is satisfied if the absolute velocity of the
+        // contact point projected onto the tangent of the wheels is equal
+        // for both gears.  This velocity can be calculated as:
+        // 
+        // u = v + omega x r_c
+        // 
+        // The constraint therefore becomes:
+        // (v_1 + omega_1 x r_c1) . l = (v_2 + omega_2 x r_c2) . l <=>
+        // (v_1 . l + (r_c1 x l) . omega_1 = v_2 . l + (r_c2 x l) . omega_2
+
+        for (i = 0 ; i < 2 ; i += 1) {
+            dSubtractVectors3 (r[i], c[i], p[i]);
+        }
+
+        dCopyVector3(J1 + GI2__JL_MIN, l[0]);
+        dCalcVectorCross3(J1 + GI2__JA_MIN, r[0], l[0]);
+
+        dCopyNegatedVector3(J2 + GI2__JL_MIN, l[1]);
+        dCalcVectorCross3(J2 + GI2__JA_MIN, l[1], r[1]);
+
+        if (delta > 0) {
+            if (backlash > 0) {
+                pairLoHi[GI2_LO] = -dInfinity;
+                pairLoHi[GI2_HI] = 0;
+            }
+
+            pairRhsCfm[GI2_RHS] = -worldFPS * erp * (delta - backlash);
+        } else {
+            if (backlash > 0) {
+                pairLoHi[GI2_LO] = 0;
+                pairLoHi[GI2_HI] = dInfinity;
+            }
+
+            pairRhsCfm[GI2_RHS] = -worldFPS * erp * (delta + backlash);
+        }
+    }
+
+    pairRhsCfm[GI2_CFM] = cfm;
+
+    // printf ("%f, %f, %f, %f, %f\n", delta, phase[0], phase[1], -phase[1] / phase[0], ratio);
+
+    // Cache the contact point (in world coordinates) to avoid
+    // recalculation if requested by the user.
+
+    dCopyVector3(contacts[0], c[0]);
+    dCopyVector3(contacts[1], c[1]);
+}
+
+void dJointSetTransmissionAxis( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    int i;
+    
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT(joint->mode == dTransmissionParallelAxes ||
+             joint->mode == dTransmissionChainDrive ,
+             "axes must be set individualy in current mode" );
+
+    for (i = 0 ; i < 2 ; i += 1) {
+        if (joint->node[i].body) {
+            dBodyVectorFromWorld(joint->node[i].body, x, y, z, joint->axes[i]);
+            dNormalize3(joint->axes[i]);
+        }
+    }
+
+    joint->update = 1;
+}
+
+void dJointSetTransmissionAxis1( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT(joint->mode == dTransmissionIntersectingAxes,
+             "can't set individual axes in current mode" );
+
+    if (joint->node[0].body) {
+        dBodyVectorFromWorld(joint->node[0].body, x, y, z, joint->axes[0]);
+        dNormalize3(joint->axes[0]);
+    }
+
+    joint->update = 1;
+}
+
+void dJointSetTransmissionAxis2( dJointID j, dReal x, dReal y, dReal z )
+{    
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT(joint->mode == dTransmissionIntersectingAxes,
+             "can't set individual axes in current mode" );
+
+    if (joint->node[1].body) {
+        dBodyVectorFromWorld(joint->node[1].body, x, y, z, joint->axes[1]);
+        dNormalize3(joint->axes[1]);
+    }
+    
+    joint->update = 1;
+}
+
+void dJointSetTransmissionAnchor1( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    if (joint->node[0].body) {
+        dBodyGetPosRelPoint(joint->node[0].body, x, y, z, joint->anchors[0]);
+    }
+    
+    joint->update = 1;
+}
+
+void dJointSetTransmissionAnchor2( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    if (joint->node[1].body) {
+        dBodyGetPosRelPoint(joint->node[1].body, x, y, z, joint->anchors[1]);
+    }
+    
+    joint->update = 1;
+}
+
+void dJointGetTransmissionContactPoint1( dJointID j, dVector3 result )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+
+    dCopyVector3(result, joint->contacts[0]);
+}
+
+void dJointGetTransmissionContactPoint2( dJointID j, dVector3 result )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+
+    dCopyVector3(result, joint->contacts[1]);
+}
+
+void dJointGetTransmissionAxis( dJointID j, dVector3 result )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    dUASSERT(joint->mode == dTransmissionParallelAxes,
+             "axes must be queried individualy in current mode" );
+
+    if (joint->node[0].body) {
+        dBodyVectorToWorld(joint->node[0].body,
+                           joint->axes[0][0],
+                           joint->axes[0][1],
+                           joint->axes[0][2],
+                           result);
+    }
+}
+
+void dJointGetTransmissionAxis1( dJointID j, dVector3 result )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+
+    if (joint->node[0].body) {
+        dBodyVectorToWorld(joint->node[0].body,
+                           joint->axes[0][0],
+                           joint->axes[0][1],
+                           joint->axes[0][2],
+                           result);
+    }
+}
+
+void dJointGetTransmissionAxis2( dJointID j, dVector3 result )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+
+    if (joint->node[1].body) {
+        dBodyVectorToWorld(joint->node[1].body,
+                           joint->axes[1][0],
+                           joint->axes[1][1],
+                           joint->axes[1][2],
+                           result);
+    }
+}
+
+void dJointGetTransmissionAnchor1( dJointID j, dVector3 result )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+
+    if (joint->node[0].body) {
+        dBodyGetRelPointPos(joint->node[0].body,
+                            joint->anchors[0][0],
+                            joint->anchors[0][1],
+                            joint->anchors[0][2],
+                            result);
+    }
+}
+
+void dJointGetTransmissionAnchor2( dJointID j, dVector3 result )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+
+    if (joint->node[1].body) {
+        dBodyGetRelPointPos(joint->node[1].body,
+                            joint->anchors[1][0], 
+                            joint->anchors[1][1], 
+                            joint->anchors[1][2],
+                            result);
+    }
+}
+
+void dJointSetTransmissionParam( dJointID j, int parameter, dReal value )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    switch ( parameter ) {
+        case dParamCFM:
+            joint->cfm = value;
+            break;
+        case dParamERP:
+            joint->erp = value;
+            break;
+    }
+}
+
+
+dReal dJointGetTransmissionParam( dJointID j, int parameter )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    switch ( parameter ) {
+        case dParamCFM:
+            return joint->cfm;
+        case dParamERP:
+            return joint->erp;
+        default:
+            return 0;
+    }
+}
+
+void dJointSetTransmissionMode( dJointID j, int mode )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( mode == dTransmissionParallelAxes ||
+              mode == dTransmissionIntersectingAxes ||
+              mode == dTransmissionChainDrive, "invalid joint mode" );
+
+    joint->mode = mode;
+}
+
+
+int dJointGetTransmissionMode( dJointID j )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    return joint->mode;
+}
+
+void dJointSetTransmissionRatio( dJointID j, dReal ratio )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( joint->mode == dTransmissionParallelAxes,
+              "can't set ratio explicitly in current mode" );
+    dUASSERT( ratio > 0, "ratio must be positive" );
+
+    joint->ratio = ratio;
+}
+
+
+dReal dJointGetTransmissionRatio( dJointID j )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    return joint->ratio;
+}
+
+dReal dJointGetTransmissionAngle1( dJointID j )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    return joint->phase[0];
+}
+
+dReal dJointGetTransmissionAngle2( dJointID j )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    return joint->phase[1];
+}
+
+dReal dJointGetTransmissionRadius1( dJointID j )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    return joint->radii[0];
+}
+
+dReal dJointGetTransmissionRadius2( dJointID j )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    return joint->radii[1];
+}
+
+void dJointSetTransmissionRadius1( dJointID j, dReal radius )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( joint->mode == dTransmissionChainDrive,
+              "can't set wheel radius explicitly in current mode" );
+
+    joint->radii[0] = radius;
+}
+
+void dJointSetTransmissionRadius2( dJointID j, dReal radius )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( joint->mode == dTransmissionChainDrive,
+              "can't set wheel radius explicitly in current mode" );
+
+    joint->radii[1] = radius;
+}
+
+dReal dJointGetTransmissionBacklash( dJointID j )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    return joint->backlash;
+}
+
+void dJointSetTransmissionBacklash( dJointID j, dReal backlash )
+{
+    dxJointTransmission* joint = static_cast<dxJointTransmission*>(j);
+    dUASSERT( joint, "bad joint argument" );
+
+    joint->backlash = backlash;
+}
+
+dJointType
+dxJointTransmission::type() const
+{
+    return dJointTypeTransmission;
+}
+
+sizeint
+dxJointTransmission::size() const
+{
+    return sizeof( *this );
+}
diff --git a/libs/ode-0.16.1/ode/src/joints/transmission.h b/libs/ode-0.16.1/ode/src/joints/transmission.h
new file mode 100644
index 0000000..fae3f4c
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/transmission.h
@@ -0,0 +1,51 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_TRANSMISSION_
+#define _ODE_JOINT_TRANSMISSION_
+
+#include "joint.h"
+
+struct dxJointTransmission : public dxJoint 
+{
+    int mode, update;
+    dVector3 contacts[2], axes[2], anchors[2];
+    dMatrix3 reference[2];
+    dReal phase[2], radii[2], backlash;
+    dReal ratio;        // transmission ratio
+    dReal erp;          // error reduction
+    dReal cfm;          // constraint force mix in
+    
+    dxJointTransmission(dxWorld *w);
+
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex );
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+};
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/joints/universal.cpp b/libs/ode-0.16.1/ode/src/joints/universal.cpp
new file mode 100644
index 0000000..1ef00a7
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/universal.cpp
@@ -0,0 +1,803 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+
+#include <ode/odeconfig.h>
+#include "config.h"
+#include "universal.h"
+#include "joint_internal.h"
+
+
+
+//****************************************************************************
+// universal
+
+// I just realized that the universal joint is equivalent to a hinge 2 joint with
+// perfectly stiff suspension.  By comparing the hinge 2 implementation to
+// the universal implementation, you may be able to improve this
+// implementation (or, less likely, the hinge2 implementation).
+
+dxJointUniversal::dxJointUniversal( dxWorld *w ) :
+    dxJoint( w )
+{
+    dSetZero( anchor1, 4 );
+    dSetZero( anchor2, 4 );
+    dSetZero( axis1, 4 );
+    axis1[0] = 1;
+    dSetZero( axis2, 4 );
+    axis2[1] = 1;
+    dSetZero( qrel1, 4 );
+    dSetZero( qrel2, 4 );
+    limot1.init( world );
+    limot2.init( world );
+}
+
+
+void
+dxJointUniversal::getAxes( dVector3 ax1, dVector3 ax2 )
+{
+    // This says "ax1 = joint->node[0].body->posr.R * joint->axis1"
+    dMultiply0_331( ax1, node[0].body->posr.R, axis1 );
+
+    if ( node[1].body )
+    {
+        dMultiply0_331( ax2, node[1].body->posr.R, axis2 );
+    }
+    else
+    {
+        ax2[0] = axis2[0];
+        ax2[1] = axis2[1];
+        ax2[2] = axis2[2];
+    }
+}
+
+void
+dxJointUniversal::getAngles( dReal *angle1, dReal *angle2 )
+{
+    if ( node[0].body )
+    {
+        // length 1 joint axis in global coordinates, from each body
+        dVector3 ax1, ax2;
+        dMatrix3 R;
+        dQuaternion qcross, qq, qrel;
+
+        getAxes( ax1, ax2 );
+
+        // It should be possible to get both angles without explicitly
+        // constructing the rotation matrix of the cross.  Basically,
+        // orientation of the cross about axis1 comes from body 2,
+        // about axis 2 comes from body 1, and the perpendicular
+        // axis can come from the two bodies somehow.  (We don't really
+        // want to assume it's 90 degrees, because in general the
+        // constraints won't be perfectly satisfied, or even very well
+        // satisfied.)
+        //
+        // However, we'd need a version of getHingeAngleFromRElativeQuat()
+        // that CAN handle when its relative quat is rotated along a direction
+        // other than the given axis.  What I have here works,
+        // although it's probably much slower than need be.
+
+        dRFrom2Axes( R, ax1[0], ax1[1], ax1[2], ax2[0], ax2[1], ax2[2] );
+
+        dRtoQ( R, qcross );
+
+
+        // This code is essentialy the same as getHingeAngle(), see the comments
+        // there for details.
+
+        // get qrel = relative rotation between node[0] and the cross
+        dQMultiply1( qq, node[0].body->q, qcross );
+        dQMultiply2( qrel, qq, qrel1 );
+
+        *angle1 = getHingeAngleFromRelativeQuat( qrel, axis1 );
+
+        // This is equivalent to
+        // dRFrom2Axes(R, ax2[0], ax2[1], ax2[2], ax1[0], ax1[1], ax1[2]);
+        // You see that the R is constructed from the same 2 axis as for angle1
+        // but the first and second axis are swapped.
+        // So we can take the first R and rapply a rotation to it.
+        // The rotation is around the axis between the 2 axes (ax1 and ax2).
+        // We do a rotation of 180deg.
+
+        dQuaternion qcross2;
+        // Find the vector between ax1 and ax2 (i.e. in the middle)
+        // We need to turn around this vector by 180deg
+
+        // The 2 axes should be normalize so to find the vector between the 2.
+        // Add and devide by 2 then normalize or simply normalize
+        //    ax2
+        //    ^
+        //    |
+        //    |
+        ///   *------------> ax1
+        //    We want the vector a 45deg
+        //
+        // N.B. We don't need to normalize the ax1 and ax2 since there are
+        //      normalized when we set them.
+
+        // We set the quaternion q = [cos(theta), dir*sin(theta)] = [w, x, y, Z]
+        qrel[0] = 0;                // equivalent to cos(Pi/2)
+        qrel[1] = ax1[0] + ax2[0];  // equivalent to x*sin(Pi/2); since sin(Pi/2) = 1
+        qrel[2] = ax1[1] + ax2[1];
+        qrel[3] = ax1[2] + ax2[2];
+
+        dReal l = dRecip( sqrt( qrel[1] * qrel[1] + qrel[2] * qrel[2] + qrel[3] * qrel[3] ) );
+        qrel[1] *= l;
+        qrel[2] *= l;
+        qrel[3] *= l;
+
+        dQMultiply0( qcross2, qrel, qcross );
+
+        if ( node[1].body )
+        {
+            dQMultiply1( qq, node[1].body->q, qcross2 );
+            dQMultiply2( qrel, qq, qrel2 );
+        }
+        else
+        {
+            // pretend joint->node[1].body->q is the identity
+            dQMultiply2( qrel, qcross2, qrel2 );
+        }
+
+        *angle2 = - getHingeAngleFromRelativeQuat( qrel, axis2 );
+    }
+    else
+    {
+        *angle1 = 0;
+        *angle2 = 0;
+    }
+}
+
+dReal
+dxJointUniversal::getAngle1()
+{
+    if ( node[0].body )
+    {
+        // length 1 joint axis in global coordinates, from each body
+        dVector3 ax1, ax2;
+        dMatrix3 R;
+        dQuaternion qcross, qq, qrel;
+
+        getAxes( ax1, ax2 );
+
+        // It should be possible to get both angles without explicitly
+        // constructing the rotation matrix of the cross.  Basically,
+        // orientation of the cross about axis1 comes from body 2,
+        // about axis 2 comes from body 1, and the perpendicular
+        // axis can come from the two bodies somehow.  (We don't really
+        // want to assume it's 90 degrees, because in general the
+        // constraints won't be perfectly satisfied, or even very well
+        // satisfied.)
+        //
+        // However, we'd need a version of getHingeAngleFromRElativeQuat()
+        // that CAN handle when its relative quat is rotated along a direction
+        // other than the given axis.  What I have here works,
+        // although it's probably much slower than need be.
+
+        dRFrom2Axes( R, ax1[0], ax1[1], ax1[2], ax2[0], ax2[1], ax2[2] );
+        dRtoQ( R, qcross );
+
+        // This code is essential the same as getHingeAngle(), see the comments
+        // there for details.
+
+        // get qrel = relative rotation between node[0] and the cross
+        dQMultiply1( qq, node[0].body->q, qcross );
+        dQMultiply2( qrel, qq, qrel1 );
+
+        return getHingeAngleFromRelativeQuat( qrel, axis1 );
+    }
+    return 0;
+}
+
+
+dReal
+dxJointUniversal::getAngle2()
+{
+    if ( node[0].body )
+    {
+        // length 1 joint axis in global coordinates, from each body
+        dVector3 ax1, ax2;
+        dMatrix3 R;
+        dQuaternion qcross, qq, qrel;
+
+        getAxes( ax1, ax2 );
+
+        // It should be possible to get both angles without explicitly
+        // constructing the rotation matrix of the cross.  Basically,
+        // orientation of the cross about axis1 comes from body 2,
+        // about axis 2 comes from body 1, and the perpendicular
+        // axis can come from the two bodies somehow.  (We don't really
+        // want to assume it's 90 degrees, because in general the
+        // constraints won't be perfectly satisfied, or even very well
+        // satisfied.)
+        //
+        // However, we'd need a version of getHingeAngleFromRElativeQuat()
+        // that CAN handle when its relative quat is rotated along a direction
+        // other than the given axis.  What I have here works,
+        // although it's probably much slower than need be.
+
+        dRFrom2Axes( R, ax2[0], ax2[1], ax2[2], ax1[0], ax1[1], ax1[2] );
+        dRtoQ( R, qcross );
+
+        if ( node[1].body )
+        {
+            dQMultiply1( qq, node[1].body->q, qcross );
+            dQMultiply2( qrel, qq, qrel2 );
+        }
+        else
+        {
+            // pretend joint->node[1].body->q is the identity
+            dQMultiply2( qrel, qcross, qrel2 );
+        }
+
+        return - getHingeAngleFromRelativeQuat( qrel, axis2 );
+    }
+    return 0;
+}
+
+
+void 
+dxJointUniversal::getSureMaxInfo( SureMaxInfo* info )
+{
+    info->max_m = 6;
+}
+
+
+void
+dxJointUniversal::getInfo1( dxJoint::Info1 *info )
+{
+    info->nub = 4;
+    info->m = 4;
+
+    bool limiting1 = ( limot1.lostop >= -M_PI || limot1.histop <= M_PI ) &&
+        limot1.lostop <= limot1.histop;
+    bool limiting2 = ( limot2.lostop >= -M_PI || limot2.histop <= M_PI ) &&
+        limot2.lostop <= limot2.histop;
+
+    // We need to call testRotationLimit() even if we're motored, since it
+    // records the result.
+    limot1.limit = 0;
+    limot2.limit = 0;
+
+    if ( limiting1 || limiting2 )
+    {
+        dReal angle1, angle2;
+        getAngles( &angle1, &angle2 );
+        if ( limiting1 )
+            limot1.testRotationalLimit( angle1 );
+        if ( limiting2 )
+            limot2.testRotationalLimit( angle2 );
+    }
+
+    if ( limot1.limit || limot1.fmax > 0 ) info->m++;
+    if ( limot2.limit || limot2.fmax > 0 ) info->m++;
+}
+
+
+void
+dxJointUniversal::getInfo2( dReal worldFPS, dReal worldERP, 
+    int rowskip, dReal *J1, dReal *J2,
+    int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+    int *findex )
+{
+    // set the three ball-and-socket rows
+    setBall( this, worldFPS, worldERP, rowskip, J1, J2, pairskip, pairRhsCfm, anchor1, anchor2 );
+
+    // set the universal joint row. the angular velocity about an axis
+    // perpendicular to both joint axes should be equal. thus the constraint
+    // equation is
+    //    p*w1 - p*w2 = 0
+    // where p is a vector normal to both joint axes, and w1 and w2
+    // are the angular velocity vectors of the two bodies.
+
+    // length 1 joint axis in global coordinates, from each body
+    dVector3 ax1, ax2;
+    // length 1 vector perpendicular to ax1 and ax2. Neither body can rotate
+    // about this.
+    dVector3 p;
+    
+    // Since axis1 and axis2 may not be perpendicular
+    // we find a axis2_tmp which is really perpendicular to axis1
+    // and in the plane of axis1 and axis2
+    getAxes( ax1, ax2 );
+
+    dReal k = dCalcVectorDot3( ax1, ax2 );
+
+    dVector3 ax2_temp;
+    dAddVectorScaledVector3(ax2_temp, ax2, ax1, -k);
+    dCalcVectorCross3( p, ax1, ax2_temp );
+    dNormalize3( p );
+
+    int currRowSkip = 3 * rowskip;
+    {
+        dCopyVector3( J1 + currRowSkip + GI2__JA_MIN, p);
+
+        if ( node[1].body )
+        {
+            dCopyNegatedVector3( J2 + currRowSkip + GI2__JA_MIN, p);
+        }
+    }
+
+    // compute the right hand side of the constraint equation. set relative
+    // body velocities along p to bring the axes back to perpendicular.
+    // If ax1, ax2 are unit length joint axes as computed from body1 and
+    // body2, we need to rotate both bodies along the axis p.  If theta
+    // is the angle between ax1 and ax2, we need an angular velocity
+    // along p to cover the angle erp * (theta - Pi/2) in one step:
+    //
+    //   |angular_velocity| = angle/time = erp*(theta - Pi/2) / stepsize
+    //                      = (erp*fps) * (theta - Pi/2)
+    //
+    // if theta is close to Pi/2,
+    // theta - Pi/2 ~= cos(theta), so
+    //    |angular_velocity|  ~= (erp*fps) * (ax1 dot ax2)
+
+    int currPairSkip = 3 * pairskip;
+    {
+        pairRhsCfm[currPairSkip + GI2_RHS] = worldFPS * worldERP * (-k);
+    }
+
+    currRowSkip += rowskip; currPairSkip += pairskip;
+
+    // if the first angle is powered, or has joint limits, add in the stuff
+    if (limot1.addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax1, 1 ))
+    {
+        currRowSkip += rowskip; currPairSkip += pairskip;
+    }
+
+    // if the second angle is powered, or has joint limits, add in more stuff
+    limot2.addLimot( this, worldFPS, J1 + currRowSkip, J2 + currRowSkip, pairRhsCfm + currPairSkip, pairLoHi + currPairSkip, ax2, 1 );
+}
+
+
+void
+dxJointUniversal::computeInitialRelativeRotations()
+{
+    if ( node[0].body )
+    {
+        dVector3 ax1, ax2;
+        dMatrix3 R;
+        dQuaternion qcross;
+
+        getAxes( ax1, ax2 );
+
+        // Axis 1.
+        dRFrom2Axes( R, ax1[0], ax1[1], ax1[2], ax2[0], ax2[1], ax2[2] );
+        dRtoQ( R, qcross );
+        dQMultiply1( qrel1, node[0].body->q, qcross );
+
+        // Axis 2.
+        dRFrom2Axes( R, ax2[0], ax2[1], ax2[2], ax1[0], ax1[1], ax1[2] );
+        dRtoQ( R, qcross );
+        if ( node[1].body )
+        {
+            dQMultiply1( qrel2, node[1].body->q, qcross );
+        }
+        else
+        {
+            // set joint->qrel to qcross
+            for ( int i = 0; i < 4; i++ ) qrel2[i] = qcross[i];
+        }
+    }
+}
+
+
+void dJointSetUniversalAnchor( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+    setAnchors( joint, x, y, z, joint->anchor1, joint->anchor2 );
+    joint->computeInitialRelativeRotations();
+}
+
+
+void dJointSetUniversalAxis1( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+    if ( joint->flags & dJOINT_REVERSE )
+        setAxes( joint, x, y, z, NULL, joint->axis2 );
+    else
+        setAxes( joint, x, y, z, joint->axis1, NULL );
+    joint->computeInitialRelativeRotations();
+}
+
+void dJointSetUniversalAxis1Offset( dJointID j, dReal x, dReal y, dReal z,
+                                   dReal offset1, dReal offset2 )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+    if ( joint->flags & dJOINT_REVERSE )
+    {
+        setAxes( joint, x, y, z, NULL, joint->axis2 );
+        offset1 = -offset1;
+        offset2 = -offset2;
+    }
+    else
+        setAxes( joint, x, y, z, joint->axis1, NULL );
+
+    joint->computeInitialRelativeRotations();
+
+
+    dVector3 ax2;
+    getAxis2( joint, ax2, joint->axis2 );
+
+    {
+        dVector3 ax1;
+        joint->getAxes(ax1, ax2);
+    }
+
+
+
+    dQuaternion qAngle;
+    dQFromAxisAndAngle(qAngle, x, y, z, offset1);
+
+    dMatrix3 R;
+    dRFrom2Axes( R, x, y, z, ax2[0], ax2[1], ax2[2] );
+
+    dQuaternion qcross;
+    dRtoQ( R, qcross );
+
+    dQuaternion qOffset;
+    dQMultiply0(qOffset, qAngle, qcross);
+
+    dQMultiply1( joint->qrel1, joint->node[0].body->q, qOffset );
+
+    // Calculating the second offset
+    dQFromAxisAndAngle(qAngle, ax2[0], ax2[1], ax2[2], offset2);
+
+    dRFrom2Axes( R, ax2[0], ax2[1], ax2[2], x, y, z );
+    dRtoQ( R, qcross );
+
+    dQMultiply1(qOffset, qAngle, qcross);
+    if ( joint->node[1].body )
+    {
+        dQMultiply1( joint->qrel2, joint->node[1].body->q, qOffset );
+    }
+    else
+    {
+        joint->qrel2[0] = qcross[0];
+        joint->qrel2[1] = qcross[1];
+        joint->qrel2[2] = qcross[2];
+        joint->qrel2[3] = qcross[3];
+    }
+}
+
+
+void dJointSetUniversalAxis2( dJointID j, dReal x, dReal y, dReal z )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+    if ( joint->flags & dJOINT_REVERSE )
+        setAxes( joint, x, y, z, joint->axis1, NULL );
+    else
+        setAxes( joint, x, y, z, NULL, joint->axis2 );
+    joint->computeInitialRelativeRotations();
+}
+
+void dJointSetUniversalAxis2Offset( dJointID j, dReal x, dReal y, dReal z,
+                                   dReal offset1, dReal offset2 )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+
+    if ( joint->flags & dJOINT_REVERSE )
+    {
+        setAxes( joint, x, y, z, joint->axis1, NULL );
+        offset1 = -offset2;
+        offset2 = -offset1;
+    }
+    else
+        setAxes( joint, x, y, z, NULL, joint->axis2 );
+
+
+    joint->computeInitialRelativeRotations();
+
+    // It is easier to retreive the 2 axes here since
+    // when there is only one body B2 (the axes switch position)
+    // Doing this way eliminate the need to write the code differently
+    // for both case.
+    dVector3 ax1, ax2;
+    joint->getAxes(ax1, ax2 );
+
+
+
+    dQuaternion qAngle;
+    dQFromAxisAndAngle(qAngle, ax1[0], ax1[1], ax1[2], offset1);
+
+    dMatrix3 R;
+    dRFrom2Axes( R, ax1[0], ax1[1], ax1[2], ax2[0], ax2[1], ax2[2]);
+
+    dQuaternion qcross;
+    dRtoQ( R, qcross );
+
+    dQuaternion qOffset;
+    dQMultiply0(qOffset, qAngle, qcross);
+
+
+
+    dQMultiply1( joint->qrel1, joint->node[0].body->q, qOffset );
+
+
+    // Calculating the second offset
+    dQFromAxisAndAngle(qAngle, ax2[0], ax2[1], ax2[2], offset2);
+
+    dRFrom2Axes( R, ax2[0], ax2[1], ax2[2], ax1[0], ax1[1], ax1[2]);
+    dRtoQ( R, qcross );
+
+    dQMultiply1(qOffset, qAngle, qcross);
+    if ( joint->node[1].body )
+    {
+        dQMultiply1( joint->qrel2, joint->node[1].body->q, qOffset );
+    }
+    else
+    {
+        joint->qrel2[0] = qcross[0];
+        joint->qrel2[1] = qcross[1];
+        joint->qrel2[2] = qcross[2];
+        joint->qrel2[3] = qcross[3];
+    }
+}
+
+
+void dJointGetUniversalAnchor( dJointID j, dVector3 result )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Universal );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAnchor2( joint, result, joint->anchor2 );
+    else
+        getAnchor( joint, result, joint->anchor1 );
+}
+
+
+void dJointGetUniversalAnchor2( dJointID j, dVector3 result )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Universal );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAnchor( joint, result, joint->anchor1 );
+    else
+        getAnchor2( joint, result, joint->anchor2 );
+}
+
+
+void dJointGetUniversalAxis1( dJointID j, dVector3 result )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Universal );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAxis2( joint, result, joint->axis2 );
+    else
+        getAxis( joint, result, joint->axis1 );
+}
+
+
+void dJointGetUniversalAxis2( dJointID j, dVector3 result )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    dUASSERT( result, "bad result argument" );
+    checktype( joint, Universal );
+    if ( joint->flags & dJOINT_REVERSE )
+        getAxis( joint, result, joint->axis1 );
+    else
+        getAxis2( joint, result, joint->axis2 );
+}
+
+
+void dJointSetUniversalParam( dJointID j, int parameter, dReal value )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+    if (( parameter & 0xff00 ) == 0x100 )
+    {
+        joint->limot2.set( parameter & 0xff, value );
+    }
+    else
+    {
+        joint->limot1.set( parameter, value );
+    }
+}
+
+
+dReal dJointGetUniversalParam( dJointID j, int parameter )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+    if (( parameter & 0xff00 ) == 0x100 )
+    {
+        return joint->limot2.get( parameter & 0xff );
+    }
+    else
+    {
+        return joint->limot1.get( parameter );
+    }
+}
+
+void dJointGetUniversalAngles( dJointID j, dReal *angle1, dReal *angle2 )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+    if ( joint->flags & dJOINT_REVERSE )
+    {
+        joint->getAngles( angle2, angle1 );
+        *angle2 = -(*angle2);
+        return;
+    }
+    else
+        return joint->getAngles( angle1, angle2 );
+}
+
+
+dReal dJointGetUniversalAngle1( dJointID j )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+    if ( joint->flags & dJOINT_REVERSE )
+        return joint->getAngle2();
+    else
+        return joint->getAngle1();
+}
+
+
+dReal dJointGetUniversalAngle2( dJointID j )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+    if ( joint->flags & dJOINT_REVERSE )
+        return -joint->getAngle1();
+    else
+        return joint->getAngle2();
+}
+
+
+dReal dJointGetUniversalAngle1Rate( dJointID j )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+
+    if ( joint->node[0].body )
+    {
+        dVector3 axis;
+
+        if ( joint->flags & dJOINT_REVERSE )
+            getAxis2( joint, axis, joint->axis2 );
+        else
+            getAxis( joint, axis, joint->axis1 );
+
+        dReal rate = dCalcVectorDot3( axis, joint->node[0].body->avel );
+        if ( joint->node[1].body )
+            rate -= dCalcVectorDot3( axis, joint->node[1].body->avel );
+        return rate;
+    }
+    return 0;
+}
+
+
+dReal dJointGetUniversalAngle2Rate( dJointID j )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dUASSERT( joint, "bad joint argument" );
+    checktype( joint, Universal );
+
+    if ( joint->node[0].body )
+    {
+        dVector3 axis;
+
+        if ( joint->flags & dJOINT_REVERSE )
+            getAxis( joint, axis, joint->axis1 );
+        else
+            getAxis2( joint, axis, joint->axis2 );
+
+        dReal rate = dCalcVectorDot3( axis, joint->node[0].body->avel );
+        if ( joint->node[1].body ) rate -= dCalcVectorDot3( axis, joint->node[1].body->avel );
+        return rate;
+    }
+    return 0;
+}
+
+
+void dJointAddUniversalTorques( dJointID j, dReal torque1, dReal torque2 )
+{
+    dxJointUniversal* joint = ( dxJointUniversal* )j;
+    dVector3 axis1, axis2;
+    dAASSERT( joint );
+    checktype( joint, Universal );
+
+    if ( joint->flags & dJOINT_REVERSE )
+    {
+        dReal temp = torque1;
+        torque1 = - torque2;
+        torque2 = - temp;
+    }
+
+    getAxis( joint, axis1, joint->axis1 );
+    getAxis2( joint, axis2, joint->axis2 );
+    axis1[0] = axis1[0] * torque1 + axis2[0] * torque2;
+    axis1[1] = axis1[1] * torque1 + axis2[1] * torque2;
+    axis1[2] = axis1[2] * torque1 + axis2[2] * torque2;
+
+    if ( joint->node[0].body != 0 )
+        dBodyAddTorque( joint->node[0].body, axis1[0], axis1[1], axis1[2] );
+    if ( joint->node[1].body != 0 )
+        dBodyAddTorque( joint->node[1].body, -axis1[0], -axis1[1], -axis1[2] );
+}
+
+
+dJointType
+dxJointUniversal::type() const
+{
+    return dJointTypeUniversal;
+}
+
+
+sizeint
+dxJointUniversal::size() const
+{
+    return sizeof( *this );
+}
+
+
+
+void
+dxJointUniversal::setRelativeValues()
+{
+    dVector3 anchor;
+    dJointGetUniversalAnchor(this, anchor);
+    setAnchors( this, anchor[0], anchor[1], anchor[2], anchor1, anchor2 );
+
+    dVector3 ax1,ax2;
+    dJointGetUniversalAxis1(this, ax1);
+    dJointGetUniversalAxis2(this, ax2);
+
+    if ( flags & dJOINT_REVERSE )
+    {
+        setAxes( this, ax1[0],ax1[1],ax1[2], NULL, axis2 );
+        setAxes( this, ax2[0],ax2[1],ax2[2], axis1, NULL );
+    }
+    else
+    {
+        setAxes( this, ax1[0],ax1[1],ax1[2], axis1, NULL );
+        setAxes( this, ax2[0],ax2[1],ax2[2], NULL, axis2 );
+    }
+
+    computeInitialRelativeRotations();
+}
+
diff --git a/libs/ode-0.16.1/ode/src/joints/universal.h b/libs/ode-0.16.1/ode/src/joints/universal.h
new file mode 100644
index 0000000..98e5468
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/joints/universal.h
@@ -0,0 +1,64 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_JOINT_UNIVERSAL_H_
+#define _ODE_JOINT_UNIVERSAL_H_
+
+#include "joint.h"
+
+// universal
+
+struct dxJointUniversal : public dxJoint
+{
+    dVector3 anchor1;   // anchor w.r.t first body
+    dVector3 anchor2;   // anchor w.r.t second body
+    dVector3 axis1;     // axis w.r.t first body
+    dVector3 axis2;     // axis w.r.t second body
+    dQuaternion qrel1;  // initial relative rotation body1 -> virtual cross piece
+    dQuaternion qrel2;  // initial relative rotation virtual cross piece -> body2
+    dxJointLimitMotor limot1; // limit and motor information for axis1
+    dxJointLimitMotor limot2; // limit and motor information for axis2
+
+
+    void getAxes( dVector3 ax1, dVector3 ax2 );
+    void getAngles( dReal *angle1, dReal *angle2 );
+    dReal getAngle1();
+    dReal getAngle2();
+    void computeInitialRelativeRotations();
+
+
+    dxJointUniversal( dxWorld *w );
+    virtual void getSureMaxInfo( SureMaxInfo* info );
+    virtual void getInfo1( Info1* info );
+    virtual void getInfo2( dReal worldFPS, dReal worldERP, 
+        int rowskip, dReal *J1, dReal *J2,
+        int pairskip, dReal *pairRhsCfm, dReal *pairLoHi, 
+        int *findex);
+    virtual dJointType type() const;
+    virtual sizeint size() const;
+
+    virtual void setRelativeValues();
+};
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/lcp.cpp b/libs/ode-0.16.1/ode/src/lcp.cpp
new file mode 100644
index 0000000..58db0bd
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/lcp.cpp
@@ -0,0 +1,1317 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+
+THE ALGORITHM
+-------------
+
+solve A*x = b+w, with x and w subject to certain LCP conditions.
+each x(i),w(i) must lie on one of the three line segments in the following
+diagram. each line segment corresponds to one index set :
+
+     w(i)
+     /|\      |           :
+      |       |           :
+      |       |i in N     :
+  w>0 |       |state[i]=0 :
+      |       |           :
+      |       |           :  i in C
+  w=0 +       +-----------------------+
+      |                   :           |
+      |                   :           |
+  w<0 |                   :           |i in N
+      |                   :           |state[i]=1
+      |                   :           |
+      |                   :           |
+      +-------|-----------|-----------|----------> x(i)
+             lo           0           hi
+
+the Dantzig algorithm proceeds as follows:
+  for i=1:n
+    * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
+      negative towards the line. as this is done, the other (x(j),w(j))
+      for j<i are constrained to be on the line. if any (x,w) reaches the
+      end of a line segment then it is switched between index sets.
+    * i is added to the appropriate index set depending on what line segment
+      it hits.
+
+we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
+simpler, because the starting point for x(i),w(i) is always on the dotted
+line x=0 and x will only ever increase in one direction, so it can only hit
+two out of the three line segments.
+
+
+NOTES
+-----
+
+this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
+the implementation is split into an LCP problem object (dLCP) and an LCP
+driver function. most optimization occurs in the dLCP object.
+
+a naive implementation of the algorithm requires either a lot of data motion
+or a lot of permutation-array lookup, because we are constantly re-ordering
+rows and columns. to avoid this and make a more optimized algorithm, a
+non-trivial data structure is used to represent the matrix A (this is
+implemented in the fast version of the dLCP object).
+
+during execution of this algorithm, some indexes in A are clamped (set C),
+some are non-clamped (set N), and some are "don't care" (where x=0).
+A,x,b,w (and other problem vectors) are permuted such that the clamped
+indexes are first, the unclamped indexes are next, and the don't-care
+indexes are last. this permutation is recorded in the array `p'.
+initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
+the corresponding elements of p are swapped.
+
+because the C and N elements are grouped together in the rows of A, we can do
+lots of work with a fast dot product function. if A,x,etc were not permuted
+and we only had a permutation array, then those dot products would be much
+slower as we would have a permutation array lookup in some inner loops.
+
+A is accessed through an array of row pointers, so that element (i,j) of the
+permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
+we still have to actually move the data.
+
+during execution of this algorithm we maintain an L*D*L' factorization of
+the clamped submatrix of A (call it `AC') which is the top left nC*nC
+submatrix of A. there are two ways we could arrange the rows/columns in AC.
+
+(1) AC is always permuted such that L*D*L' = AC. this causes a problem
+when a row/column is removed from C, because then all the rows/columns of A
+between the deleted index and the end of C need to be rotated downward.
+this results in a lot of data motion and slows things down.
+(2) L*D*L' is actually a factorization of a *permutation* of AC (which is
+itself a permutation of the underlying A). this is what we do - the
+permutation is recorded in the vector C. call this permutation A[C,C].
+when a row/column is removed from C, all we have to do is swap two
+rows/columns and manipulate C.
+
+*/
+
+#include <ode/common.h>
+#include <ode/misc.h>
+#include <ode/timer.h>		// for testing
+#include "config.h"
+#include "lcp.h"
+#include "util.h"
+#include "matrix.h"
+#include "mat.h"		// for testing
+#include "threaded_solver_ldlt.h"
+
+#include "fastdot_impl.h"
+#include "fastldltfactor_impl.h"
+#include "fastldltsolve_impl.h"
+
+
+//***************************************************************************
+// code generation parameters
+
+// LCP debugging (mostly for fast dLCP) - this slows things down a lot
+//#define DEBUG_LCP
+
+#define dLCP_FAST		// use fast dLCP object
+
+#define NUB_OPTIMIZATIONS // use NUB optimizations
+
+
+// option 1 : matrix row pointers (less data copying)
+#define ROWPTRS
+#define ATYPE dReal **
+#define AROW(i) (m_A[i])
+
+// option 2 : no matrix row pointers (slightly faster inner loops)
+//#define NOROWPTRS
+//#define ATYPE dReal *
+//#define AROW(i) (m_A+(i)*m_nskip)
+
+
+//***************************************************************************
+
+#define dMIN(A,B)  ((A)>(B) ? (B) : (A))
+#define dMAX(A,B)  ((B)>(A) ? (B) : (A))
+
+
+#define LMATRIX_ALIGNMENT       dMAX(64, EFFICIENT_ALIGNMENT)
+
+//***************************************************************************
+
+
+// transfer b-values to x-values
+template<bool zero_b>
+inline 
+void transfer_b_to_x(dReal pairsbx[PBX__MAX], unsigned n)
+{
+    dReal *const endbx = pairsbx + (sizeint)n * PBX__MAX;
+    for (dReal *currbx = pairsbx; currbx != endbx; currbx += PBX__MAX) {
+        currbx[PBX_X] = currbx[PBX_B];
+        if (zero_b) {
+            currbx[PBX_B] = REAL(0.0);
+        }
+    }
+}
+
+// swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
+// A is nskip. this only references and swaps the lower triangle.
+// if `do_fast_row_swaps' is nonzero and row pointers are being used, then
+// rows will be swapped by exchanging row pointers. otherwise the data will
+// be copied.
+
+static 
+void swapRowsAndCols (ATYPE A, unsigned n, unsigned i1, unsigned i2, unsigned nskip, 
+                             int do_fast_row_swaps)
+{
+    dAASSERT (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
+        nskip >= n && i1 < i2);
+
+# ifdef ROWPTRS
+    dReal *A_i1 = A[i1];
+    dReal *A_i2 = A[i2];
+    for (unsigned i=i1+1; i<i2; ++i) {
+        dReal *A_i_i1 = A[i] + i1;
+        A_i1[i] = *A_i_i1;
+        *A_i_i1 = A_i2[i];
+    }
+    A_i1[i2] = A_i1[i1];
+    A_i1[i1] = A_i2[i1];
+    A_i2[i1] = A_i2[i2];
+    // swap rows, by swapping row pointers
+    if (do_fast_row_swaps) {
+        A[i1] = A_i2;
+        A[i2] = A_i1;
+    }
+    else {
+        // Only swap till i2 column to match A plain storage variant.
+        for (unsigned k = 0; k <= i2; ++k) {
+            dxSwap(A_i1[k], A_i2[k]);
+        }
+    }
+    // swap columns the hard way
+    for (unsigned j = i2 + 1; j < n; ++j) {
+        dReal *A_j = A[j];
+        dxSwap(A_j[i1], A_j[i2]);
+    }
+# else
+    dReal *A_i1 = A + (sizeint)nskip * i1;
+    dReal *A_i2 = A + (sizeint)nskip * i2;
+
+    for (unsigned k = 0; k < i1; ++k) {
+        dxSwap(A_i1[k], A_i2[k]);
+    }
+
+    dReal *A_i = A_i1 + nskip;
+    for (unsigned i= i1 + 1; i < i2; A_i += nskip, ++i) {
+        dxSwap(A_i2[i], A_i[i1]);
+    }
+
+    dxSwap(A_i1[i1], A_i2[i2]);
+
+    dReal *A_j = A_i2 + nskip;
+    for (unsigned j = i2 + 1; j < n; A_j += nskip, ++j) {
+        dxSwap(A_j[i1], A_j[i2]);
+    }
+# endif
+}
+
+
+// swap two indexes in the n*n LCP problem. i1 must be <= i2.
+
+static 
+void swapProblem (ATYPE A, dReal pairsbx[PBX__MAX], dReal *w, dReal pairslh[PLH__MAX],
+                         unsigned *p, bool *state, int *findex,
+                         unsigned n, unsigned i1, unsigned i2, unsigned nskip,
+                         int do_fast_row_swaps)
+{
+    dIASSERT (n>0 && i1 < n && i2 < n && nskip >= n && i1 <= i2);
+    
+    if (i1 != i2) {
+        swapRowsAndCols (A, n, i1, i2, nskip, do_fast_row_swaps);
+
+        dxSwap((pairsbx + (sizeint)i1 * PBX__MAX)[PBX_B], (pairsbx + (sizeint)i2 * PBX__MAX)[PBX_B]);
+        dxSwap((pairsbx + (sizeint)i1 * PBX__MAX)[PBX_X], (pairsbx + (sizeint)i2 * PBX__MAX)[PBX_X]);
+        dSASSERT(PBX__MAX == 2);
+
+        dxSwap(w[i1], w[i2]);
+
+        dxSwap((pairslh + (sizeint)i1 * PLH__MAX)[PLH_LO], (pairslh + (sizeint)i2 * PLH__MAX)[PLH_LO]);
+        dxSwap((pairslh + (sizeint)i1 * PLH__MAX)[PLH_HI], (pairslh + (sizeint)i2 * PLH__MAX)[PLH_HI]);
+        dSASSERT(PLH__MAX == 2);
+
+        dxSwap(p[i1], p[i2]);
+        dxSwap(state[i1], state[i2]);
+
+        if (findex != NULL) {
+            dxSwap(findex[i1], findex[i2]);
+        }
+    }
+}
+
+
+// for debugging - check that L,d is the factorization of A[C,C].
+// A[C,C] has size nC*nC and leading dimension nskip.
+// L has size nC*nC and leading dimension nskip.
+// d has size nC.
+
+#ifdef DEBUG_LCP
+
+static 
+void checkFactorization (ATYPE A, dReal *_L, dReal *_d,
+                                unsigned nC, unsigned *C, unsigned nskip)
+{
+    unsigned i, j;
+    if (nC == 0) return;
+
+    // get A1=A, copy the lower triangle to the upper triangle, get A2=A[C,C]
+    dMatrix A1 (nC, nC);
+    for (i=0; i < nC; i++) {
+        for (j = 0; j <= i; j++) A1(i, j) = A1(j, i) = AROW(i)[j];
+    }
+    dMatrix A2 = A1.select (nC, C, nC, C);
+
+    // printf ("A1=\n"); A1.print(); printf ("\n");
+    // printf ("A2=\n"); A2.print(); printf ("\n");
+
+    // compute A3 = L*D*L'
+    dMatrix L (nC, nC, _L, nskip, 1);
+    dMatrix D (nC, nC);
+    for (i = 0; i < nC; i++) D(i, i) = 1.0 / _d[i];
+    L.clearUpperTriangle();
+    for (i = 0; i < nC; i++) L(i, i) = 1;
+    dMatrix A3 = L * D * L.transpose();
+
+    // printf ("L=\n"); L.print(); printf ("\n");
+    // printf ("D=\n"); D.print(); printf ("\n");
+    // printf ("A3=\n"); A2.print(); printf ("\n");
+
+    // compare A2 and A3
+    dReal diff = A2.maxDifference (A3);
+    if (diff > 1e-8)
+        dDebug (0, "L*D*L' check, maximum difference = %.6e\n", diff);
+}
+
+#endif
+
+
+// for debugging
+
+#ifdef DEBUG_LCP
+
+static 
+void checkPermutations (unsigned i, unsigned n, unsigned nC, unsigned nN, unsigned *p, unsigned *C)
+{
+    unsigned j,k;
+    dIASSERT (/*nC >= 0 && nN >= 0 && */(nC + nN) == i && i < n);
+    for (k=0; k<i; k++) dIASSERT (p[k] >= 0 && p[k] < i);
+    for (k=i; k<n; k++) dIASSERT (p[k] == k);
+    for (j=0; j<nC; j++) {
+        int C_is_bad = 1;
+        for (k=0; k<nC; k++) if (C[k]==j) C_is_bad = 0;
+        dIASSERT (C_is_bad==0);
+    }
+}
+
+#endif
+
+//***************************************************************************
+// dLCP manipulator object. this represents an n*n LCP problem.
+//
+// two index sets C and N are kept. each set holds a subset of
+// the variable indexes 0..n-1. an index can only be in one set.
+// initially both sets are empty.
+//
+// the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
+
+//***************************************************************************
+// fast implementation of dLCP. see the above definition of dLCP for
+// interface comments.
+//
+// `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
+// permuted as the other vectors/matrices are permuted.
+//
+// A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
+// contiguous indexes. the don't-care indexes follow N.
+//
+// an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
+// added or removed from the set C the factorization is updated.
+// thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
+// the leading dimension of the matrix L is always `nskip'.
+//
+// at the start there may be other indexes that are unbounded but are not
+// included in `nub'. dLCP will permute the matrix so that absolutely all
+// unbounded vectors are at the start. thus there may be some initial
+// permutation.
+//
+// the algorithms here assume certain patterns, particularly with respect to
+// index transfer.
+
+#ifdef dLCP_FAST
+
+struct dLCP {
+    const unsigned m_n;
+    const unsigned m_nskip;
+    unsigned m_nub;
+    unsigned m_nC, m_nN;				// size of each index set
+    ATYPE const m_A;				// A rows
+    dReal *const m_pairsbx, *const m_w, *const m_pairslh;	// permuted LCP problem data
+    dReal *const m_L, *const m_d;				// L*D*L' factorization of set C
+    dReal *const m_Dell, *const m_ell, *const m_tmp;
+    bool *const m_state;
+    int *const m_findex;
+    unsigned *const m_p, *const m_C;
+
+    dLCP (unsigned _n, unsigned _nskip, unsigned _nub, dReal *_Adata, dReal *_pairsbx, dReal *_w,
+        dReal *_pairslh, dReal *_L, dReal *_d,
+        dReal *_Dell, dReal *_ell, dReal *_tmp,
+        bool *_state, int *_findex, unsigned *_p, unsigned *_C, dReal **Arows);
+    unsigned getNub() const { return m_nub; }
+    void transfer_i_to_C (unsigned i);
+    void transfer_i_to_N (unsigned /*i*/) { m_nN++; }			// because we can assume C and N span 1:i-1
+    void transfer_i_from_N_to_C (unsigned i);
+    void transfer_i_from_C_to_N (unsigned i, void *tmpbuf);
+    static sizeint estimate_transfer_i_from_C_to_N_mem_req(unsigned nC, unsigned nskip) { return dEstimateLDLTRemoveTmpbufSize(nC, nskip); }
+    unsigned numC() const { return m_nC; }
+    unsigned numN() const { return m_nN; }
+    unsigned indexC (unsigned i) const { return i; }
+    unsigned indexN (unsigned i) const { return i+m_nC; }
+    dReal Aii (unsigned i) const  { return AROW(i)[i]; }
+    template<unsigned q_stride>
+    dReal AiC_times_qC (unsigned i, dReal *q) const { return calculateLargeVectorDot<q_stride> (AROW(i), q, m_nC); }
+    template<unsigned q_stride>
+    dReal AiN_times_qN (unsigned i, dReal *q) const { return calculateLargeVectorDot<q_stride> (AROW(i) + m_nC, q + (sizeint)m_nC * q_stride, m_nN); }
+    void pN_equals_ANC_times_qC (dReal *p, dReal *q);
+    void pN_plusequals_ANi (dReal *p, unsigned i, bool dir_positive);
+    template<unsigned p_stride>
+    void pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q);
+    void pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q);
+    void solve1 (dReal *a, unsigned i, bool dir_positive, int only_transfer=0);
+    void unpermute_X();
+    void unpermute_W();
+};
+
+
+dLCP::dLCP (unsigned _n, unsigned _nskip, unsigned _nub, dReal *_Adata, dReal *_pairsbx, dReal *_w,
+            dReal *_pairslh, dReal *_L, dReal *_d,
+            dReal *_Dell, dReal *_ell, dReal *_tmp,
+            bool *_state, int *_findex, unsigned *_p, unsigned *_C, dReal **Arows):
+    m_n(_n), m_nskip(_nskip), m_nub(_nub), m_nC(0), m_nN(0),
+# ifdef ROWPTRS
+    m_A(Arows),
+#else
+    m_A(_Adata),
+#endif
+    m_pairsbx(_pairsbx), m_w(_w), m_pairslh(_pairslh), 
+    m_L(_L), m_d(_d), m_Dell(_Dell), m_ell(_ell), m_tmp(_tmp),
+    m_state(_state), m_findex(_findex), m_p(_p), m_C(_C)
+{
+    dxtSetZero<PBX__MAX>(m_pairsbx + PBX_X, m_n);
+
+    {
+# ifdef ROWPTRS
+        // make matrix row pointers
+        dReal *aptr = _Adata;
+        ATYPE A = m_A;
+        const unsigned n = m_n, nskip = m_nskip;
+        for (unsigned k=0; k<n; aptr+=nskip, ++k) A[k] = aptr;
+# endif
+    }
+
+    {
+        unsigned *p = m_p;
+        const unsigned n = m_n;
+        for (unsigned k=0; k != n; ++k) p[k] = k;		// initially unpermutted
+    }
+
+    /*
+    // for testing, we can do some random swaps in the area i > nub
+    {
+    const unsigned n = m_n;
+    const unsigned nub = m_nub;
+    if (nub < n) {
+    for (unsigned k=0; k<100; k++) {
+    unsigned i1,i2;
+    do {
+    i1 = dRandInt(n-nub)+nub;
+    i2 = dRandInt(n-nub)+nub;
+    }
+    while (i1 > i2); 
+    //printf ("--> %d %d\n",i1,i2);
+    swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, n, i1, i2, m_nskip, 0);
+    }
+    }
+    */
+
+    // permute the problem so that *all* the unbounded variables are at the
+    // start, i.e. look for unbounded variables not included in `nub'. we can
+    // potentially push up `nub' this way and get a bigger initial factorization.
+    // note that when we swap rows/cols here we must not just swap row pointers,
+    // as the initial factorization relies on the data being all in one chunk.
+    // variables that have findex >= 0 are *not* considered to be unbounded even
+    // if lo=-inf and hi=inf - this is because these limits may change during the
+    // solution process.
+
+    {
+        int *findex = m_findex;
+        dReal *pairslh = m_pairslh;
+        const unsigned n = m_n;
+        for (unsigned k = m_nub; k < n; ++k) {
+            if (findex && findex[k] >= 0) continue;
+            if ((pairslh + (sizeint)k * PLH__MAX)[PLH_LO] == -dInfinity && (pairslh + (sizeint)k * PLH__MAX)[PLH_HI] == dInfinity) {
+                swapProblem (m_A, m_pairsbx, m_w, pairslh, m_p, m_state, findex, n, m_nub, k, m_nskip, 0);
+                m_nub++;
+            }
+        }
+    }
+
+    // if there are unbounded variables at the start, factorize A up to that
+    // point and solve for x. this puts all indexes 0..nub-1 into C.
+    if (m_nub > 0) {
+        const unsigned nub = m_nub;
+        {
+            dReal *Lrow = m_L;
+            const unsigned nskip = m_nskip;
+            for (unsigned j = 0; j < nub; Lrow += nskip, ++j) memcpy(Lrow, AROW(j), (j + 1) * sizeof(dReal));
+        }
+        transfer_b_to_x<false> (m_pairsbx, nub);
+        factorMatrixAsLDLT<1> (m_L, m_d, nub, m_nskip);
+        solveEquationSystemWithLDLT<1, PBX__MAX> (m_L, m_d, m_pairsbx + PBX_X, nub, m_nskip);
+        dSetZero (m_w, nub);
+        {
+            unsigned *C = m_C;
+            for (unsigned k = 0; k < nub; ++k) C[k] = k;
+        }
+        m_nC = nub;
+    }
+
+    // permute the indexes > nub such that all findex variables are at the end
+    if (m_findex) {
+        const unsigned nub = m_nub;
+        int *findex = m_findex;
+        unsigned num_at_end = 0;
+        for (unsigned k = m_n; k > nub; ) {
+            --k;
+            if (findex[k] >= 0) {
+                swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, findex, m_n, k, m_n - 1 - num_at_end, m_nskip, 1);
+                num_at_end++;
+            }
+        }
+    }
+
+    // print info about indexes
+    /*
+    {
+    const unsigned n = m_n;
+    const unsigned nub = m_nub;
+    for (unsigned k=0; k<n; k++) {
+    if (k<nub) printf ("C");
+    else if ((m_pairslh + (sizeint)k * PLH__MAX)[PLH_LO] == -dInfinity && (m_pairslh + (sizeint)k * PLH__MAX)[PLH_HI] == dInfinity) printf ("c");
+    else printf (".");
+    }
+    printf ("\n");
+    }
+    */
+}
+
+
+void dLCP::transfer_i_to_C (unsigned i)
+{
+    {
+        const unsigned nC = m_nC;
+
+        if (nC > 0) {
+            // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
+            dReal *const Ltgt = m_L + (sizeint)m_nskip * nC, *ell = m_ell;
+            memcpy(Ltgt, ell, nC * sizeof(dReal));
+
+            dReal ell_Dell_dot = dxDot(m_ell, m_Dell, nC);
+            dReal AROW_i_i = AROW(i)[i] != ell_Dell_dot ? AROW(i)[i] : dNextAfter(AROW(i)[i], dInfinity); // A hack to avoid getting a zero in the denominator
+            m_d[nC] = dRecip (AROW_i_i - ell_Dell_dot);
+        }
+        else {
+            m_d[0] = dRecip (AROW(i)[i]);
+        }
+
+        swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, m_n, nC, i, m_nskip, 1);
+
+        m_C[nC] = nC;
+        m_nC = nC + 1; // nC value is outdated after this line
+    }
+
+# ifdef DEBUG_LCP
+    checkFactorization (m_A, m_L, m_d, m_nC, m_C, m_nskip);
+    if (i < (m_n-1)) checkPermutations (i+1, m_n, m_nC, m_nN, m_p, m_C);
+# endif
+}
+
+
+void dLCP::transfer_i_from_N_to_C (unsigned i)
+{
+    {
+        const unsigned nC = m_nC;
+        if (nC > 0) {
+            {
+                dReal *const aptr = AROW(i);
+                dReal *Dell = m_Dell;
+                const unsigned *C = m_C;
+#   ifdef NUB_OPTIMIZATIONS
+                // if nub>0, initial part of aptr unpermuted
+                const unsigned nub = m_nub;
+                unsigned j=0;
+                for ( ; j<nub; ++j) Dell[j] = aptr[j];
+                for ( ; j<nC; ++j) Dell[j] = aptr[C[j]];
+#   else
+                for (unsigned j=0; j<nC; ++j) Dell[j] = aptr[C[j]];
+#   endif
+            }
+            solveL1Straight<1>(m_L, m_Dell, nC, m_nskip);
+
+            dReal ell_Dell_dot = REAL(0.0);
+            dReal *const Ltgt = m_L + (sizeint)m_nskip * nC;
+            dReal *ell = m_ell, *Dell = m_Dell, *d = m_d;
+            for (unsigned j = 0; j < nC; ++j) {
+                dReal ell_j, Dell_j = Dell[j];
+                Ltgt[j] = ell[j] = ell_j = Dell_j * d[j];
+                ell_Dell_dot += ell_j * Dell_j;
+            }
+            
+            dReal AROW_i_i = AROW(i)[i] != ell_Dell_dot ? AROW(i)[i] : dNextAfter(AROW(i)[i], dInfinity); // A hack to avoid getting a zero in the denominator
+            m_d[nC] = dRecip (AROW_i_i - ell_Dell_dot);
+        }
+        else {
+            m_d[0] = dRecip (AROW(i)[i]);
+        }
+
+        swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, m_n, nC, i, m_nskip, 1);
+
+        m_C[nC] = nC;
+        m_nN--;
+        m_nC = nC + 1; // nC value is outdated after this line
+    }
+
+    // @@@ TO DO LATER
+    // if we just finish here then we'll go back and re-solve for
+    // delta_x. but actually we can be more efficient and incrementally
+    // update delta_x here. but if we do this, we wont have ell and Dell
+    // to use in updating the factorization later.
+
+# ifdef DEBUG_LCP
+    checkFactorization (m_A,m_L,m_d,m_nC,m_C,m_nskip);
+# endif
+}
+
+
+void dLCP::transfer_i_from_C_to_N (unsigned i, void *tmpbuf)
+{
+    {
+        unsigned *C = m_C;
+        // remove a row/column from the factorization, and adjust the
+        // indexes (black magic!)
+        int last_idx = -1;
+        const unsigned nC = m_nC;
+        unsigned j = 0;
+        for ( ; j < nC; ++j) {
+            if (C[j] == nC - 1) {
+                last_idx = j;
+            }
+            if (C[j] == i) {
+                dxLDLTRemove (m_A, C, m_L, m_d, m_n, nC, j, m_nskip, tmpbuf);
+                unsigned k;
+                if (last_idx == -1) {
+                    for (k = j + 1 ; k < nC; ++k) {
+                        if (C[k] == nC - 1) {
+                            break;
+                        }
+                    }
+                    dIASSERT (k < nC);
+                }
+                else {
+                    k = last_idx;
+                }
+                C[k] = C[j];
+                if (j != (nC - 1)) memmove (C + j, C + j + 1, (nC - j - 1) * sizeof(C[0]));
+                break;
+            }
+        }
+        dIASSERT (j < nC);
+
+        swapProblem (m_A, m_pairsbx, m_w, m_pairslh, m_p, m_state, m_findex, m_n, i, nC - 1, m_nskip, 1);
+
+        m_nN++;
+        m_nC = nC - 1; // nC value is outdated after this line
+    }
+
+# ifdef DEBUG_LCP
+    checkFactorization (m_A, m_L, m_d, m_nC, m_C, m_nskip);
+# endif
+}
+
+
+void dLCP::pN_equals_ANC_times_qC (dReal *p, dReal *q)
+{
+    // we could try to make this matrix-vector multiplication faster using
+    // outer product matrix tricks, e.g. with the dMultidotX() functions.
+    // but i tried it and it actually made things slower on random 100x100
+    // problems because of the overhead involved. so we'll stick with the
+    // simple method for now.
+    const unsigned nC = m_nC;
+    dReal *ptgt = p + nC;
+    const unsigned nN = m_nN;
+    for (unsigned i = 0; i < nN; ++i) {
+        ptgt[i] = dxDot (AROW(i + nC), q, nC);
+    }
+}
+
+
+void dLCP::pN_plusequals_ANi (dReal *p, unsigned i, bool dir_positive)
+{
+    const unsigned nC = m_nC;
+    dReal *aptr = AROW(i) + nC;
+    dReal *ptgt = p + nC;
+    if (dir_positive) {
+        const unsigned nN = m_nN;
+        for (unsigned j=0; j < nN; ++j) ptgt[j] += aptr[j];
+    }
+    else {
+        const unsigned nN = m_nN;
+        for (unsigned j=0; j < nN; ++j) ptgt[j] -= aptr[j];
+    }
+}
+
+template<unsigned p_stride>
+void dLCP::pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q)
+{
+    const unsigned nC = m_nC;
+    dReal *q_end = q + nC;
+    for (; q != q_end; p += p_stride, ++q) {
+        *p += s * (*q);
+    }
+}
+
+void dLCP::pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q)
+{
+    const unsigned nC = m_nC;
+    dReal *ptgt = p + nC, *qsrc = q + nC;
+    const unsigned nN = m_nN;
+    for (unsigned i = 0; i < nN; ++i) {
+        ptgt[i] += s * qsrc[i];
+    }
+}
+
+void dLCP::solve1 (dReal *a, unsigned i, bool dir_positive, int only_transfer)
+{
+    // the `Dell' and `ell' that are computed here are saved. if index i is
+    // later added to the factorization then they can be reused.
+    //
+    // @@@ question: do we need to solve for entire delta_x??? yes, but
+    //     only if an x goes below 0 during the step.
+
+    const unsigned nC = m_nC;
+    if (nC > 0) {
+        {
+            dReal *Dell = m_Dell;
+            unsigned *C = m_C;
+            dReal *aptr = AROW(i);
+#   ifdef NUB_OPTIMIZATIONS
+            // if nub>0, initial part of aptr[] is guaranteed unpermuted
+            const unsigned nub = m_nub;
+            unsigned j = 0;
+            for ( ; j < nub; ++j) Dell[j] = aptr[j];
+            for ( ; j < nC; ++j) Dell[j] = aptr[C[j]];
+#   else
+            for (unsigned j = 0; j < nC; ++j) Dell[j] = aptr[C[j]];
+#   endif
+        }
+        solveL1Straight<1>(m_L, m_Dell, nC, m_nskip);
+        {
+            dReal *ell = m_ell, *Dell = m_Dell, *d = m_d;
+            for (unsigned j = 0; j < nC; ++j) ell[j] = Dell[j] * d[j];
+        }
+
+        if (!only_transfer) {
+            dReal *tmp = m_tmp, *ell = m_ell;
+            {
+                for (unsigned j = 0; j < nC; ++j) tmp[j] = ell[j];
+            }
+            solveL1Transposed<1>(m_L, tmp, nC, m_nskip);
+            if (dir_positive) {
+                unsigned *C = m_C;
+                dReal *tmp = m_tmp;
+                for (unsigned j = 0; j < nC; ++j) a[C[j]] = -tmp[j];
+            } else {
+                unsigned *C = m_C;
+                dReal *tmp = m_tmp;
+                for (unsigned j = 0; j < nC; ++j) a[C[j]] = tmp[j];
+            }
+        }
+    }
+}
+
+
+void dLCP::unpermute_X()
+{
+    unsigned *p = m_p;
+    dReal *pairsbx = m_pairsbx;
+    const unsigned n = m_n;
+    for (unsigned j = 0; j < n; ++j) {
+        unsigned k = p[j];
+        if (k != j) {
+            // p[j] = j; -- not going to be checked anymore anyway
+            dReal x_j = (pairsbx + (sizeint)j * PBX__MAX)[PBX_X];
+            for (;;) {
+                dxSwap(x_j, (pairsbx + (sizeint)k * PBX__MAX)[PBX_X]);
+
+                unsigned orig_k = p[k];
+                p[k] = k;
+                if (orig_k == j) {
+                    break;
+                }
+                k = orig_k;
+            }
+            (pairsbx + (sizeint)j * PBX__MAX)[PBX_X] = x_j;
+        }
+    }
+}
+
+void dLCP::unpermute_W()
+{
+    memcpy (m_tmp, m_w, m_n * sizeof(dReal));
+
+    const unsigned *p = m_p;
+    dReal *w = m_w, *tmp = m_tmp;
+    const unsigned n = m_n;
+    for (unsigned j = 0; j < n; ++j) {
+        unsigned k = p[j];
+        w[k] = tmp[j];
+    }
+}
+
+#endif // dLCP_FAST
+
+
+static void dxSolveLCP_AllUnbounded (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX]);
+static void dxSolveLCP_Generic (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX], 
+                                dReal *outer_w/*=NULL*/, unsigned nub, dReal pairslh[PLH__MAX], int *findex);
+
+/*extern */
+void dxSolveLCP (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX],
+    dReal *outer_w/*=NULL*/, unsigned nub, dReal pairslh[PLH__MAX], int *findex)
+{
+    if (nub >= n)
+    {
+        dxSolveLCP_AllUnbounded (memarena, n, A, pairsbx);
+    }
+    else
+    {
+        dxSolveLCP_Generic (memarena, n, A, pairsbx, outer_w, nub, pairslh, findex);
+    }
+}
+
+//***************************************************************************
+// if all the variables are unbounded then we can just factor, solve, and return
+
+static 
+void dxSolveLCP_AllUnbounded (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX])
+{
+    dAASSERT(A != NULL);
+    dAASSERT(pairsbx != NULL);
+    dAASSERT(n != 0);
+
+    transfer_b_to_x<true>(pairsbx, n);    
+
+    unsigned nskip = dPAD(n);
+    factorMatrixAsLDLT<PBX__MAX> (A, pairsbx + PBX_B, n, nskip);
+    solveEquationSystemWithLDLT<PBX__MAX, PBX__MAX> (A, pairsbx + PBX_B, pairsbx + PBX_X, n, nskip);
+}
+
+//***************************************************************************
+// an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
+
+static 
+void dxSolveLCP_Generic (dxWorldProcessMemArena *memarena, unsigned n, dReal *A, dReal pairsbx[PBX__MAX],
+    dReal *outer_w/*=NULL*/, unsigned nub, dReal pairslh[PLH__MAX], int *findex)
+{
+    dAASSERT (n > 0 && A && pairsbx && pairslh && nub >= 0 && nub < n);
+# ifndef dNODEBUG
+    {
+        // check restrictions on lo and hi
+        dReal *endlh = pairslh + (sizeint)n * PLH__MAX;
+        for (dReal *currlh = pairslh; currlh != endlh; currlh += PLH__MAX) dIASSERT (currlh[PLH_LO] <= 0 && currlh[PLH_HI] >= 0);
+    }
+# endif
+
+    const unsigned nskip = dPAD(n);
+    dReal *L = memarena->AllocateOveralignedArray<dReal> ((sizeint)nskip * n, LMATRIX_ALIGNMENT);
+    dReal *d = memarena->AllocateArray<dReal> (n);
+    dReal *w = outer_w != NULL ? outer_w : memarena->AllocateArray<dReal> (n);
+    dReal *delta_w = memarena->AllocateArray<dReal> (n);
+    dReal *delta_x = memarena->AllocateArray<dReal> (n);
+    dReal *Dell = memarena->AllocateArray<dReal> (n);
+    dReal *ell = memarena->AllocateArray<dReal> (n);
+#ifdef ROWPTRS
+    dReal **Arows = memarena->AllocateArray<dReal *> (n);
+#else
+    dReal **Arows = NULL;
+#endif
+    unsigned *p = memarena->AllocateArray<unsigned> (n);
+    unsigned *C = memarena->AllocateArray<unsigned> (n);
+
+    // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
+    bool *state = memarena->AllocateArray<bool> (n);
+
+    // create LCP object. note that tmp is set to delta_w to save space, this
+    // optimization relies on knowledge of how tmp is used, so be careful!
+    dLCP lcp(n, nskip, nub, A, pairsbx, w, pairslh, L, d, Dell, ell, delta_w, state, findex, p, C, Arows);
+    unsigned adj_nub = lcp.getNub();
+
+    // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the
+    // LCP conditions then i is added to the appropriate index set. otherwise
+    // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
+    // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
+    // while driving x(i) we maintain the LCP conditions on the other variables
+    // 0..i-1. we do this by watching out for other x(i),w(i) values going
+    // outside the valid region, and then switching them between index sets
+    // when that happens.
+
+    bool hit_first_friction_index = false;
+    for (unsigned i = adj_nub; i < n; ++i) {
+        bool s_error = false;
+        // the index i is the driving index and indexes i+1..n-1 are "dont care",
+        // i.e. when we make changes to the system those x's will be zero and we
+        // don't care what happens to those w's. in other words, we only consider
+        // an (i+1)*(i+1) sub-problem of A*x=b+w.
+
+        // if we've hit the first friction index, we have to compute the lo and
+        // hi values based on the values of x already computed. we have been
+        // permuting the indexes, so the values stored in the findex vector are
+        // no longer valid. thus we have to temporarily unpermute the x vector. 
+        // for the purposes of this computation, 0*infinity = 0 ... so if the
+        // contact constraint's normal force is 0, there should be no tangential
+        // force applied.
+
+        if (!hit_first_friction_index && findex && findex[i] >= 0) {
+            // un-permute x into delta_w, which is not being used at the moment
+            for (unsigned j = 0; j < n; ++j) delta_w[p[j]] = (pairsbx + (sizeint)j * PBX__MAX)[PBX_X];
+
+            // set lo and hi values
+            for (unsigned k = i; k < n; ++k) {
+                dReal *currlh = pairslh + (sizeint)k * PLH__MAX;
+                dReal wfk = delta_w[findex[k]];
+                if (wfk == 0) {
+                    currlh[PLH_HI] = 0;
+                    currlh[PLH_LO] = 0;
+                }
+                else {
+                    currlh[PLH_HI] = dFabs (currlh[PLH_HI] * wfk);
+                    currlh[PLH_LO] = -currlh[PLH_HI];
+                }
+            }
+            hit_first_friction_index = true;
+        }
+
+        // thus far we have not even been computing the w values for indexes
+        // greater than i, so compute w[i] now.
+        dReal wPrep = lcp.AiC_times_qC<PBX__MAX> (i, pairsbx + PBX_X) + lcp.AiN_times_qN<PBX__MAX> (i, pairsbx + PBX_X);
+
+        dReal *currbx = pairsbx + (sizeint)i * PBX__MAX;
+
+        w[i] = wPrep - currbx[PBX_B];
+
+        // if lo=hi=0 (which can happen for tangential friction when normals are
+        // 0) then the index will be assigned to set N with some state. however,
+        // set C's line has zero size, so the index will always remain in set N.
+        // with the "normal" switching logic, if w changed sign then the index
+        // would have to switch to set C and then back to set N with an inverted
+        // state. this is pointless, and also computationally expensive. to
+        // prevent this from happening, we use the rule that indexes with lo=hi=0
+        // will never be checked for set changes. this means that the state for
+        // these indexes may be incorrect, but that doesn't matter.
+
+        dReal *currlh = pairslh + (sizeint)i * PLH__MAX;
+
+        // see if x(i),w(i) is in a valid region
+        if (currlh[PLH_LO] == 0 && w[i] >= 0) {
+            lcp.transfer_i_to_N (i);
+            state[i] = false;
+        }
+        else if (currlh[PLH_HI] == 0 && w[i] <= 0) {
+            lcp.transfer_i_to_N (i);
+            state[i] = true;
+        }
+        else if (w[i] == 0) {
+            // this is a degenerate case. by the time we get to this test we know
+            // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
+            // and similarly that hi > 0. this means that the line segment
+            // corresponding to set C is at least finite in extent, and we are on it.
+            // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C()
+            lcp.solve1 (delta_x, i, false, 1);
+
+            lcp.transfer_i_to_C (i);
+        }
+        else {
+            // we must push x(i) and w(i)
+            for (;;) {
+                // find direction to push on x(i)
+                bool dir_positive = (w[i] <= 0);
+
+                // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
+                lcp.solve1 (delta_x, i, dir_positive);
+
+                // note that delta_x[i] = (dir_positive ? 1 : -1), but we wont bother to set it
+
+                // compute: delta_w = A*delta_x ... note we only care about
+                // delta_w(N) and delta_w(i), the rest is ignored
+                lcp.pN_equals_ANC_times_qC (delta_w, delta_x);
+                lcp.pN_plusequals_ANi (delta_w, i, dir_positive);
+                delta_w[i] = dir_positive 
+                    ? lcp.AiC_times_qC<1> (i, delta_x) + lcp.Aii(i)
+                    : lcp.AiC_times_qC<1> (i, delta_x) - lcp.Aii(i);
+
+                // find largest step we can take (size=s), either to drive x(i),w(i)
+                // to the valid LCP region or to drive an already-valid variable
+                // outside the valid region.
+
+                int cmd = 1;		// index switching command
+                unsigned si = 0;		// si = index to switch if cmd>3
+
+                dReal s = delta_w[i] != REAL(0.0)
+                    ? -w[i] / delta_w[i]
+                    : (w[i] != REAL(0.0) ? dCopySign(dInfinity, -w[i]) : REAL(0.0));
+                    
+                if (dir_positive) {
+                    if (currlh[PLH_HI] < dInfinity) {
+                        dReal s2 = (currlh[PLH_HI] - currbx[PBX_X]);	// was (hi[i]-x[i])/dirf	// step to x(i)=hi(i)
+                        if (s2 < s) {
+                            s = s2;
+                            cmd = 3;
+                        }
+                    }
+                }
+                else {
+                    if (currlh[PLH_LO] > -dInfinity) {
+                        dReal s2 = (currbx[PBX_X] - currlh[PLH_LO]); // was (lo[i]-x[i])/dirf	// step to x(i)=lo(i)
+                        if (s2 < s) {
+                            s = s2;
+                            cmd = 2;
+                        }
+                    }
+                }
+
+                {
+                    const unsigned numN = lcp.numN();
+                    for (unsigned k = 0; k < numN; ++k) {
+                        const unsigned indexN_k = lcp.indexN(k);
+                        if (!state[indexN_k] ? delta_w[indexN_k] < 0 : delta_w[indexN_k] > 0) {
+                            // don't bother checking if lo=hi=0
+                            dReal *indexlh = pairslh + (sizeint)indexN_k * PLH__MAX;
+                            if (indexlh[PLH_LO] == 0 && indexlh[PLH_HI] == 0) continue;
+                            dReal s2 = -w[indexN_k] / delta_w[indexN_k];
+                            if (s2 < s) {
+                                s = s2;
+                                cmd = 4;
+                                si = indexN_k;
+                            }
+                        }
+                    }
+                }
+
+                {
+                    const unsigned numC = lcp.numC();
+                    for (unsigned k = adj_nub; k < numC; ++k) {
+                        const unsigned indexC_k = lcp.indexC(k);
+                        dReal *indexlh = pairslh + (sizeint)indexC_k * PLH__MAX;
+                        if (delta_x[indexC_k] < 0 && indexlh[PLH_LO] > -dInfinity) {
+                            dReal s2 = (indexlh[PLH_LO] - (pairsbx + (sizeint)indexC_k * PBX__MAX)[PBX_X]) / delta_x[indexC_k];
+                            if (s2 < s) {
+                                s = s2;
+                                cmd = 5;
+                                si = indexC_k;
+                            }
+                        }
+                        if (delta_x[indexC_k] > 0 && indexlh[PLH_HI] < dInfinity) {
+                            dReal s2 = (indexlh[PLH_HI] - (pairsbx + (sizeint)indexC_k * PBX__MAX)[PBX_X]) / delta_x[indexC_k];
+                            if (s2 < s) {
+                                s = s2;
+                                cmd = 6;
+                                si = indexC_k;
+                            }
+                        }
+                    }
+                }
+
+                //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
+                //			     "C->NL","C->NH"};
+                //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
+
+                // if s <= 0 then we've got a problem. if we just keep going then
+                // we're going to get stuck in an infinite loop. instead, just cross
+                // our fingers and exit with the current solution.
+                if (s <= REAL(0.0)) {
+                    dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",(double)s);
+                    if (i < n) {
+                        dxtSetZero<PBX__MAX>(currbx + PBX_X, n - i);
+                        dxSetZero (w + i, n - i);
+                    }
+                    s_error = true;
+                    break;
+                }
+
+                // apply x = x + s * delta_x
+                lcp.pC_plusequals_s_times_qC<PBX__MAX> (pairsbx + PBX_X, s, delta_x);
+                currbx[PBX_X] = dir_positive 
+                    ? currbx[PBX_X] + s
+                    : currbx[PBX_X] - s;
+
+                // apply w = w + s * delta_w
+                lcp.pN_plusequals_s_times_qN (w, s, delta_w);
+                w[i] += s * delta_w[i];
+
+                void *tmpbuf;
+                // switch indexes between sets if necessary
+                switch (cmd) {
+                case 1:		// done
+                    w[i] = 0;
+                    lcp.transfer_i_to_C (i);
+                    break;
+                case 2:		// done
+                    currbx[PBX_X] = currlh[PLH_LO];
+                    state[i] = false;
+                    lcp.transfer_i_to_N (i);
+                    break;
+                case 3:		// done
+                    currbx[PBX_X] = currlh[PLH_HI];
+                    state[i] = true;
+                    lcp.transfer_i_to_N (i);
+                    break;
+                case 4:		// keep going
+                    w[si] = 0;
+                    lcp.transfer_i_from_N_to_C (si);
+                    break;
+                case 5:		// keep going
+                    (pairsbx + (sizeint)si * PBX__MAX)[PBX_X] = (pairslh + (sizeint)si * PLH__MAX)[PLH_LO];
+                    state[si] = false;
+                    tmpbuf = memarena->PeekBufferRemainder();
+                    lcp.transfer_i_from_C_to_N (si, tmpbuf);
+                    break;
+                case 6:		// keep going
+                    (pairsbx + (sizeint)si * PBX__MAX)[PBX_X] = (pairslh + (sizeint)si * PLH__MAX)[PLH_HI];
+                    state[si] = true;
+                    tmpbuf = memarena->PeekBufferRemainder();
+                    lcp.transfer_i_from_C_to_N (si, tmpbuf);
+                    break;
+                }
+
+                if (cmd <= 3) break;
+            } // for (;;)
+        } // else
+
+        if (s_error) {
+            break;
+        }
+    } // for (unsigned i = adj_nub; i < n; ++i)
+
+    // now we have to un-permute x and w
+    if (outer_w != NULL) {
+        lcp.unpermute_W();
+    }
+    lcp.unpermute_X(); // This destroys p[] and must be done last
+}
+
+sizeint dxEstimateSolveLCPMemoryReq(unsigned n, bool outer_w_avail)
+{
+    const unsigned nskip = dPAD(n);
+
+    sizeint res = 0;
+
+    res += dOVERALIGNED_SIZE(sizeof(dReal) * ((sizeint)n * nskip), LMATRIX_ALIGNMENT); // for L
+    res += 5 * dEFFICIENT_SIZE(sizeof(dReal) * n); // for d, delta_w, delta_x, Dell, ell
+    if (!outer_w_avail) {
+        res += dEFFICIENT_SIZE(sizeof(dReal) * n); // for w
+    }
+#ifdef ROWPTRS
+    res += dEFFICIENT_SIZE(sizeof(dReal *) * n); // for Arows
+#endif
+    res += 2 * dEFFICIENT_SIZE(sizeof(unsigned) * n); // for p, C
+    res += dEFFICIENT_SIZE(sizeof(bool) * n); // for state
+
+    // Use n instead of nC as nC varies at runtime while n is greater or equal to nC
+    sizeint lcp_transfer_req = dLCP::estimate_transfer_i_from_C_to_N_mem_req(n, nskip);
+    res += dEFFICIENT_SIZE(lcp_transfer_req); // for dLCP::transfer_i_from_C_to_N
+
+    return res;
+}
+
+
+//***************************************************************************
+// accuracy and timing test
+
+static sizeint EstimateTestSolveLCPMemoryReq(unsigned n)
+{
+    const unsigned nskip = dPAD(n);
+
+    sizeint res = 0;
+
+    res += 2 * dEFFICIENT_SIZE(sizeof(dReal) * ((sizeint)n * nskip)); // for A, A2
+    res += 7 * dEFFICIENT_SIZE(sizeof(dReal) * n); // for x, b, w, lo, hi, tmp1, tmp2
+    res += dEFFICIENT_SIZE(sizeof(dReal) * PBX__MAX * n); // for pairsbx, 
+    res += dEFFICIENT_SIZE(sizeof(dReal) * PLH__MAX * n); // for pairslh
+
+    res += dxEstimateSolveLCPMemoryReq(n, true);
+
+    return res;
+}
+
+extern "C" ODE_API int dTestSolveLCP()
+{
+    const unsigned n = 100;
+
+    sizeint memreq = EstimateTestSolveLCPMemoryReq(n);
+    dxWorldProcessMemArena *arena = dxAllocateTemporaryWorldProcessMemArena(memreq, NULL, NULL);
+    if (arena == NULL) {
+        return 0;
+    }
+    arena->ResetState();
+
+    unsigned i,nskip = dPAD(n);
+#ifdef dDOUBLE
+    const dReal tol = REAL(1e-9);
+#endif
+#ifdef dSINGLE
+    const dReal tol = REAL(1e-4);
+#endif
+    printf ("dTestSolveLCP()\n");
+
+    dReal *A = arena->AllocateArray<dReal> (n*nskip);
+    dReal *x = arena->AllocateArray<dReal> (n);
+    dReal *b = arena->AllocateArray<dReal> (n);
+    dReal *w = arena->AllocateArray<dReal> (n);
+    dReal *lo = arena->AllocateArray<dReal> (n);
+    dReal *hi = arena->AllocateArray<dReal> (n);
+
+    dReal *A2 = arena->AllocateArray<dReal> (n*nskip);
+    dReal *pairsbx = arena->AllocateArray<dReal> (n * PBX__MAX);
+    dReal *pairslh = arena->AllocateArray<dReal> (n * PLH__MAX);
+
+    dReal *tmp1 = arena->AllocateArray<dReal> (n);
+    dReal *tmp2 = arena->AllocateArray<dReal> (n);
+
+    double total_time = 0;
+    for (unsigned count=0; count < 1000; count++) {
+        BEGIN_STATE_SAVE(arena, saveInner) {
+
+            // form (A,b) = a random positive definite LCP problem
+            dMakeRandomMatrix (A2,n,n,1.0);
+            dMultiply2 (A,A2,A2,n,n,n);
+            dMakeRandomMatrix (x,n,1,1.0);
+            dMultiply0 (b,A,x,n,n,1);
+            for (i=0; i<n; i++) b[i] += (dRandReal()*REAL(0.2))-REAL(0.1);
+
+            // choose `nub' in the range 0..n-1
+            unsigned nub = 50; //dRandInt (n);
+
+            // make limits
+            for (i=0; i<nub; i++) lo[i] = -dInfinity;
+            for (i=0; i<nub; i++) hi[i] = dInfinity;
+            //for (i=nub; i<n; i++) lo[i] = 0;
+            //for (i=nub; i<n; i++) hi[i] = dInfinity;
+            //for (i=nub; i<n; i++) lo[i] = -dInfinity;
+            //for (i=nub; i<n; i++) hi[i] = 0;
+            for (i=nub; i<n; i++) lo[i] = -(dRandReal()*REAL(1.0))-REAL(0.01);
+            for (i=nub; i<n; i++) hi[i] =  (dRandReal()*REAL(1.0))+REAL(0.01);
+
+            // set a few limits to lo=hi=0
+            /*
+            for (i=0; i<10; i++) {
+            unsigned j = dRandInt (n-nub) + nub;
+            lo[j] = 0;
+            hi[j] = 0;
+            }
+            */
+
+            // solve the LCP. we must make copy of A,b,lo,hi (A2,b2,lo2,hi2) for
+            // SolveLCP() to permute. also, we'll clear the upper triangle of A2 to
+            // ensure that it doesn't get referenced (if it does, the answer will be
+            // wrong).
+
+            memcpy (A2, A, n * nskip * sizeof(dReal));
+            dClearUpperTriangle (A2, n);
+            for (i = 0; i != n; ++i) {
+                dReal *currbx = pairsbx + i * PBX__MAX;
+                currbx[PBX_B] = b[i];
+                currbx[PBX_X] = 0;
+            }
+            for (i = 0; i != n; ++i) {
+                dReal *currlh = pairslh + i * PLH__MAX;
+                currlh[PLH_LO] = lo[i];
+                currlh[PLH_HI] = hi[i];
+            }
+            dSetZero (w,n);
+
+            dStopwatch sw;
+            dStopwatchReset (&sw);
+            dStopwatchStart (&sw);
+
+            dxSolveLCP (arena,n,A2,pairsbx,w,nub,pairslh,0);
+
+            dStopwatchStop (&sw);
+            double time = dStopwatchTime(&sw);
+            total_time += time;
+            double average = total_time / double(count+1) * 1000.0;
+
+            for (i = 0; i != n; ++i) {
+                const dReal *currbx = pairsbx + i * PBX__MAX;
+                x[i] = currbx[PBX_X];
+            }
+
+            // check the solution
+
+            dMultiply0 (tmp1,A,x,n,n,1);
+            for (i=0; i<n; i++) tmp2[i] = b[i] + w[i];
+            dReal diff = dMaxDifference (tmp1,tmp2,n,1);
+            // printf ("\tA*x = b+w, maximum difference = %.6e - %s (1)\n",diff,
+            //	    diff > tol ? "FAILED" : "passed");
+            if (diff > tol) dDebug (0,"A*x = b+w, maximum difference = %.6e",diff);
+            unsigned n1=0,n2=0,n3=0;
+            for (i=0; i<n; i++) {
+                if (x[i]==lo[i] && w[i] >= 0) {
+                    n1++;	// ok
+                }
+                else if (x[i]==hi[i] && w[i] <= 0) {
+                    n2++;	// ok
+                }
+                else if (x[i] >= lo[i] && x[i] <= hi[i] && w[i] == 0) {
+                    n3++;	// ok
+                }
+                else {
+                    dDebug (0,"FAILED: i=%d x=%.4e w=%.4e lo=%.4e hi=%.4e",i,
+                        x[i],w[i],lo[i],hi[i]);
+                }
+            }
+
+            // pacifier
+            printf ("passed: NL=%3d NH=%3d C=%3d   ",n1,n2,n3);
+            printf ("time=%10.3f ms  avg=%10.4f\n",time * 1000.0,average);
+
+        } END_STATE_SAVE(arena, saveInner);
+    }
+
+    dxFreeTemporaryWorldProcessMemArena(arena);
+    return 1;
+}
diff --git a/libs/ode-0.16.1/ode/src/lcp.h b/libs/ode-0.16.1/ode/src/lcp.h
new file mode 100644
index 0000000..da65d6f
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/lcp.h
@@ -0,0 +1,81 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+given (A,b,lo,hi), solve the LCP problem: A*x = b+w, where each x(i),w(i)
+satisfies one of
+    (1) x = lo, w >= 0
+    (2) x = hi, w <= 0
+    (3) lo < x < hi, w = 0
+A is a matrix of dimension n*n, everything else is a vector of size n*1.
+lo and hi can be +/- dInfinity as needed. the first `nub' variables are
+unbounded, i.e. hi and lo are assumed to be +/- dInfinity.
+
+we restrict lo(i) <= 0 and hi(i) >= 0.
+
+the original data (A,b) may be modified by this function.
+
+if the `findex' (friction index) parameter is nonzero, it points to an array
+of index values. in this case constraints that have findex[i] >= 0 are
+special. all non-special constraints are solved for, then the lo and hi values
+for the special constraints are set:
+    hi[i] = abs( hi[i] * x[findex[i]] )
+    lo[i] = -hi[i]
+and the solution continues. this mechanism allows a friction approximation
+to be implemented. the first `nub' variables are assumed to have findex < 0.
+
+*/
+
+
+#ifndef _ODE_LCP_H_
+#define _ODE_LCP_H_
+
+class dxWorldProcessMemArena;
+
+enum dxLCPBXElement
+{
+    PBX__MIN,
+
+    PBX_B = PBX__MIN,
+    PBX_X,
+
+    PBX__MAX,
+};
+
+enum dxLCPLHElement
+{
+    PLH__MIN,
+
+    PLH_LO = PLH__MIN,
+    PLH_HI,
+
+    PLH__MAX,
+};
+
+void dxSolveLCP (dxWorldProcessMemArena *memarena, 
+    unsigned n, dReal *A, dReal pairsbx[PBX__MAX], dReal *w,
+    unsigned nub, dReal pairslh[PLH__MAX], int *findex);
+
+sizeint dxEstimateSolveLCPMemoryReq(unsigned n, bool outer_w_avail);
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/mass.cpp b/libs/ode-0.16.1/ode/src/mass.cpp
new file mode 100644
index 0000000..961b2da
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/mass.cpp
@@ -0,0 +1,554 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/odeconfig.h>
+#include <ode/mass.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+
+// Local dependencies
+#include "collision_kernel.h"
+
+#if dTRIMESH_ENABLED
+#include "collision_trimesh_internal.h"
+#endif // dTRIMESH_ENABLED
+
+#define	SQR(x)			((x)*(x))						//!< Returns x square
+#define	CUBE(x)			((x)*(x)*(x))					//!< Returns x cube
+
+#define _I(i,j) I[(i)*4+(j)]
+
+
+// return 1 if ok, 0 if bad
+
+int dMassCheck (const dMass *m)
+{
+    int i;
+
+    if (m->mass <= 0) {
+        dDEBUGMSG ("mass must be > 0");
+        return 0;
+    }
+    if (!dIsPositiveDefinite (m->I,3,NULL)) {
+        dDEBUGMSG ("inertia must be positive definite");
+        return 0;
+    }
+
+    // verify that the center of mass position is consistent with the mass
+    // and inertia matrix. this is done by checking that the inertia around
+    // the center of mass is also positive definite. from the comment in
+    // dMassTranslate(), if the body is translated so that its center of mass
+    // is at the point of reference, then the new inertia is:
+    //   I + mass*crossmat(c)^2
+    // note that requiring this to be positive definite is exactly equivalent
+    // to requiring that the spatial inertia matrix
+    //   [ mass*eye(3,3)   M*crossmat(c)^T ]
+    //   [ M*crossmat(c)   I               ]
+    // is positive definite, given that I is PD and mass>0. see the theorem
+    // about partitioned PD matrices for proof.
+
+    dMatrix3 I2,chat;
+    dSetZero (chat,12);
+    dSetCrossMatrixPlus (chat,m->c,4);
+    dMultiply0_333 (I2,chat,chat);
+    for (i=0; i<3; i++) I2[i] = m->I[i] + m->mass*I2[i];
+    for (i=4; i<7; i++) I2[i] = m->I[i] + m->mass*I2[i];
+    for (i=8; i<11; i++) I2[i] = m->I[i] + m->mass*I2[i];
+    if (!dIsPositiveDefinite (I2,3,NULL)) {
+        dDEBUGMSG ("center of mass inconsistent with mass parameters");
+        return 0;
+    }
+    return 1;
+}
+
+
+void dMassSetZero (dMass *m)
+{
+    dAASSERT (m);
+    m->mass = REAL(0.0);
+    dSetZero (m->c,sizeof(m->c) / sizeof(dReal));
+    dSetZero (m->I,sizeof(m->I) / sizeof(dReal));
+}
+
+
+void dMassSetParameters (dMass *m, dReal themass,
+                         dReal cgx, dReal cgy, dReal cgz,
+                         dReal I11, dReal I22, dReal I33,
+                         dReal I12, dReal I13, dReal I23)
+{
+    dAASSERT (m);
+    dMassSetZero (m);
+    m->mass = themass;
+    m->c[0] = cgx;
+    m->c[1] = cgy;
+    m->c[2] = cgz;
+    m->_I(0,0) = I11;
+    m->_I(1,1) = I22;
+    m->_I(2,2) = I33;
+    m->_I(0,1) = I12;
+    m->_I(0,2) = I13;
+    m->_I(1,2) = I23;
+    m->_I(1,0) = I12;
+    m->_I(2,0) = I13;
+    m->_I(2,1) = I23;
+    dMassCheck (m);
+}
+
+
+void dMassSetSphere (dMass *m, dReal density, dReal radius)
+{
+    dMassSetSphereTotal (m, (dReal) ((REAL(4.0)/REAL(3.0)) * M_PI *
+        radius*radius*radius * density), radius);
+}
+
+
+void dMassSetSphereTotal (dMass *m, dReal total_mass, dReal radius)
+{
+    dAASSERT (m);
+    dMassSetZero (m);
+    m->mass = total_mass;
+    dReal II = REAL(0.4) * total_mass * radius*radius;
+    m->_I(0,0) = II;
+    m->_I(1,1) = II;
+    m->_I(2,2) = II;
+
+# ifndef dNODEBUG
+    dMassCheck (m);
+# endif
+}
+
+
+void dMassSetCapsule (dMass *m, dReal density, int direction,
+                      dReal radius, dReal length)
+{
+    dReal M1,M2,Ia,Ib;
+    dAASSERT (m);
+    dUASSERT (direction >= 1 && direction <= 3,"bad direction number");
+    dMassSetZero (m);
+    M1 = (dReal) (M_PI*radius*radius*length*density);			  // cylinder mass
+    M2 = (dReal) ((REAL(4.0)/REAL(3.0))*M_PI*radius*radius*radius*density); // total cap mass
+    m->mass = M1+M2;
+    Ia = M1*(REAL(0.25)*radius*radius + (REAL(1.0)/REAL(12.0))*length*length) +
+        M2*(REAL(0.4)*radius*radius + REAL(0.375)*radius*length + REAL(0.25)*length*length);
+    Ib = (M1*REAL(0.5) + M2*REAL(0.4))*radius*radius;
+    m->_I(0,0) = Ia;
+    m->_I(1,1) = Ia;
+    m->_I(2,2) = Ia;
+    m->_I(direction-1,direction-1) = Ib;
+
+# ifndef dNODEBUG
+    dMassCheck (m);
+# endif
+}
+
+
+void dMassSetCapsuleTotal (dMass *m, dReal total_mass, int direction,
+                           dReal a, dReal b)
+{
+    dMassSetCapsule (m, 1.0, direction, a, b);
+    dMassAdjust (m, total_mass);
+}
+
+
+void dMassSetCylinder (dMass *m, dReal density, int direction,
+                       dReal radius, dReal length)
+{
+    dMassSetCylinderTotal (m, (dReal) (M_PI*radius*radius*length*density),
+        direction, radius, length);
+}
+
+void dMassSetCylinderTotal (dMass *m, dReal total_mass, int direction,
+                            dReal radius, dReal length)
+{
+    dReal r2,I;
+    dAASSERT (m);
+    dUASSERT (direction >= 1 && direction <= 3,"bad direction number");
+    dMassSetZero (m);
+    r2 = radius*radius;
+    m->mass = total_mass;
+    I = total_mass*(REAL(0.25)*r2 + (REAL(1.0)/REAL(12.0))*length*length);
+    m->_I(0,0) = I;
+    m->_I(1,1) = I;
+    m->_I(2,2) = I;
+    m->_I(direction-1,direction-1) = total_mass*REAL(0.5)*r2;
+
+# ifndef dNODEBUG
+    dMassCheck (m);
+# endif
+}
+
+
+void dMassSetBox (dMass *m, dReal density,
+                  dReal lx, dReal ly, dReal lz)
+{
+    dMassSetBoxTotal (m, lx*ly*lz*density, lx, ly, lz);
+}
+
+
+void dMassSetBoxTotal (dMass *m, dReal total_mass,
+                       dReal lx, dReal ly, dReal lz)
+{
+    dAASSERT (m);
+    dMassSetZero (m);
+    m->mass = total_mass;
+    m->_I(0,0) = total_mass/REAL(12.0) * (ly*ly + lz*lz);
+    m->_I(1,1) = total_mass/REAL(12.0) * (lx*lx + lz*lz);
+    m->_I(2,2) = total_mass/REAL(12.0) * (lx*lx + ly*ly);
+
+# ifndef dNODEBUG
+    dMassCheck (m);
+# endif
+}
+
+
+
+
+
+
+/*
+* dMassSetTrimesh, implementation by Gero Mueller.
+* Based on Brian Mirtich, "Fast and Accurate Computation of
+* Polyhedral Mass Properties," journal of graphics tools, volume 1,
+* number 2, 1996.
+*/
+void dMassSetTrimesh( dMass *m, dReal density, dGeomID g )
+{
+    dAASSERT (m);
+    dUASSERT(g && g->type == dTriMeshClass, "argument not a trimesh");
+
+    dMassSetZero (m);
+
+#if dTRIMESH_ENABLED
+
+    dxTriMesh *TriMesh = static_cast<dxTriMesh *>(g);
+    unsigned int triangles = TriMesh->getMeshTriangleCount();
+
+    dReal nx, ny, nz;
+    unsigned int i, A, B, C;
+    // face integrals
+    dReal Fa, Fb, Fc, Faa, Fbb, Fcc, Faaa, Fbbb, Fccc, Faab, Fbbc, Fcca;
+
+    // projection integrals
+    dReal P1, Pa, Pb, Paa, Pab, Pbb, Paaa, Paab, Pabb, Pbbb;
+
+    dReal T0 = 0;
+    dReal T1[3] = {0., 0., 0.};
+    dReal T2[3] = {0., 0., 0.};
+    dReal TP[3] = {0., 0., 0.};
+
+    for( i = 0; i < triangles; i++ )	 	
+    {
+        dVector3 v[3];
+        TriMesh->fetchMeshTransformedTriangle(v, i);
+
+        dVector3 n, a, b;
+        dSubtractVectors3( a, v[1], v[0] ); 
+        dSubtractVectors3( b, v[2], v[0] ); 
+        dCalcVectorCross3( n, b, a );
+        nx = fabs(n[0]);
+        ny = fabs(n[1]);
+        nz = fabs(n[2]);
+
+        if( nx > ny && nx > nz )
+            C = 0;
+        else
+            C = (ny > nz) ? 1 : 2;
+
+        // Even though all triangles might be initially valid, 
+        // a triangle may degenerate into a segment after applying 
+        // space transformation.
+        if (n[C] != REAL(0.0))
+        {
+            A = (C + 1) % 3;
+            B = (A + 1) % 3;
+
+            // calculate face integrals
+            {
+                dReal w;
+                dReal k1, k2, k3, k4;
+
+                //compProjectionIntegrals(f);
+                {
+                    dReal a0=0, a1=0, da;
+                    dReal b0=0, b1=0, db;
+                    dReal a0_2, a0_3, a0_4, b0_2, b0_3, b0_4;
+                    dReal a1_2, a1_3, b1_2, b1_3;
+                    dReal C1, Ca, Caa, Caaa, Cb, Cbb, Cbbb;
+                    dReal Cab, Kab, Caab, Kaab, Cabb, Kabb;
+
+                    P1 = Pa = Pb = Paa = Pab = Pbb = Paaa = Paab = Pabb = Pbbb = 0.0;
+
+                    for( int j = 0; j < 3; j++)
+                    {
+                        switch(j)
+                        {
+                        case 0:
+                            a0 = v[0][A];
+                            b0 = v[0][B];
+                            a1 = v[1][A];
+                            b1 = v[1][B];
+                            break;
+                        case 1:
+                            a0 = v[1][A];
+                            b0 = v[1][B];
+                            a1 = v[2][A];
+                            b1 = v[2][B];
+                            break;
+                        case 2:
+                            a0 = v[2][A];
+                            b0 = v[2][B];
+                            a1 = v[0][A];
+                            b1 = v[0][B];
+                            break;
+                        }
+                        da = a1 - a0;
+                        db = b1 - b0;
+                        a0_2 = a0 * a0; a0_3 = a0_2 * a0; a0_4 = a0_3 * a0;
+                        b0_2 = b0 * b0; b0_3 = b0_2 * b0; b0_4 = b0_3 * b0;
+                        a1_2 = a1 * a1; a1_3 = a1_2 * a1; 
+                        b1_2 = b1 * b1; b1_3 = b1_2 * b1;
+
+                        C1 = a1 + a0;
+                        Ca = a1*C1 + a0_2; Caa = a1*Ca + a0_3; Caaa = a1*Caa + a0_4;
+                        Cb = b1*(b1 + b0) + b0_2; Cbb = b1*Cb + b0_3; Cbbb = b1*Cbb + b0_4;
+                        Cab = 3*a1_2 + 2*a1*a0 + a0_2; Kab = a1_2 + 2*a1*a0 + 3*a0_2;
+                        Caab = a0*Cab + 4*a1_3; Kaab = a1*Kab + 4*a0_3;
+                        Cabb = 4*b1_3 + 3*b1_2*b0 + 2*b1*b0_2 + b0_3;
+                        Kabb = b1_3 + 2*b1_2*b0 + 3*b1*b0_2 + 4*b0_3;
+
+                        P1 += db*C1;
+                        Pa += db*Ca;
+                        Paa += db*Caa;
+                        Paaa += db*Caaa;
+                        Pb += da*Cb;
+                        Pbb += da*Cbb;
+                        Pbbb += da*Cbbb;
+                        Pab += db*(b1*Cab + b0*Kab);
+                        Paab += db*(b1*Caab + b0*Kaab);
+                        Pabb += da*(a1*Cabb + a0*Kabb);
+                    }
+
+                    P1 /= 2.0;
+                    Pa /= 6.0;
+                    Paa /= 12.0;
+                    Paaa /= 20.0;
+                    Pb /= -6.0;
+                    Pbb /= -12.0;
+                    Pbbb /= -20.0;
+                    Pab /= 24.0;
+                    Paab /= 60.0;
+                    Pabb /= -60.0;
+                }
+
+                w = - dCalcVectorDot3(n, v[0]);
+
+                k1 = 1 / n[C]; k2 = k1 * k1; k3 = k2 * k1; k4 = k3 * k1;
+
+                Fa = k1 * Pa;
+                Fb = k1 * Pb;
+                Fc = -k2 * (n[A]*Pa + n[B]*Pb + w*P1);
+
+                Faa = k1 * Paa;
+                Fbb = k1 * Pbb;
+                Fcc = k3 * (SQR(n[A])*Paa + 2*n[A]*n[B]*Pab + SQR(n[B])*Pbb +
+                    w*(2*(n[A]*Pa + n[B]*Pb) + w*P1));
+
+                Faaa = k1 * Paaa;
+                Fbbb = k1 * Pbbb;
+                Fccc = -k4 * (CUBE(n[A])*Paaa + 3*SQR(n[A])*n[B]*Paab 
+                    + 3*n[A]*SQR(n[B])*Pabb + CUBE(n[B])*Pbbb
+                    + 3*w*(SQR(n[A])*Paa + 2*n[A]*n[B]*Pab + SQR(n[B])*Pbb)
+                    + w*w*(3*(n[A]*Pa + n[B]*Pb) + w*P1));
+
+                Faab = k1 * Paab;
+                Fbbc = -k2 * (n[A]*Pabb + n[B]*Pbbb + w*Pbb);
+                Fcca = k3 * (SQR(n[A])*Paaa + 2*n[A]*n[B]*Paab + SQR(n[B])*Pabb
+                    + w*(2*(n[A]*Paa + n[B]*Pab) + w*Pa));
+            }
+
+
+            T0 += n[0] * ((A == 0) ? Fa : ((B == 0) ? Fb : Fc));
+
+            T1[A] += n[A] * Faa;
+            T1[B] += n[B] * Fbb;
+            T1[C] += n[C] * Fcc;
+            T2[A] += n[A] * Faaa;
+            T2[B] += n[B] * Fbbb;
+            T2[C] += n[C] * Fccc;
+            TP[A] += n[A] * Faab;
+            TP[B] += n[B] * Fbbc;
+            TP[C] += n[C] * Fcca;
+        }
+    }
+
+    T1[0] /= 2; T1[1] /= 2; T1[2] /= 2;
+    T2[0] /= 3; T2[1] /= 3; T2[2] /= 3;
+    TP[0] /= 2; TP[1] /= 2; TP[2] /= 2;
+
+    m->mass = density * T0;
+    m->_I(0,0) = density * (T2[1] + T2[2]);
+    m->_I(1,1) = density * (T2[2] + T2[0]);
+    m->_I(2,2) = density * (T2[0] + T2[1]);
+    m->_I(0,1) = - density * TP[0];
+    m->_I(1,0) = - density * TP[0];
+    m->_I(2,1) = - density * TP[1];
+    m->_I(1,2) = - density * TP[1];
+    m->_I(2,0) = - density * TP[2];
+    m->_I(0,2) = - density * TP[2];
+
+    // Added to address SF bug 1729095
+    dMassTranslate( m, T1[0] / T0,  T1[1] / T0,  T1[2] / T0 );
+
+# ifndef dNODEBUG
+    dMassCheck (m);
+# endif
+
+#endif // dTRIMESH_ENABLED
+}
+
+
+void dMassSetTrimeshTotal( dMass *m, dReal total_mass, dGeomID g)
+{
+    dAASSERT( m );
+    dUASSERT( g && g->type == dTriMeshClass, "argument not a trimesh" );
+    dMassSetTrimesh( m, 1.0, g );
+    dMassAdjust( m, total_mass );
+}
+
+
+
+
+void dMassAdjust (dMass *m, dReal newmass)
+{
+    dAASSERT (m);
+    dReal scale = newmass / m->mass;
+    m->mass = newmass;
+    for (int i=0; i<3; i++) for (int j=0; j<3; j++) m->_I(i,j) *= scale;
+
+# ifndef dNODEBUG
+    dMassCheck (m);
+# endif
+}
+
+
+void dMassTranslate (dMass *m, dReal x, dReal y, dReal z)
+{
+    // if the body is translated by `a' relative to its point of reference,
+    // the new inertia about the point of reference is:
+    //
+    //   I + mass*(crossmat(c)^2 - crossmat(c+a)^2)
+    //
+    // where c is the existing center of mass and I is the old inertia.
+
+    int i,j;
+    dMatrix3 ahat,chat,t1,t2;
+    dReal a[3];
+
+    dAASSERT (m);
+
+    // adjust inertia matrix
+    dSetZero (chat,12);
+    dSetCrossMatrixPlus (chat,m->c,4);
+    a[0] = x + m->c[0];
+    a[1] = y + m->c[1];
+    a[2] = z + m->c[2];
+    dSetZero (ahat,12);
+    dSetCrossMatrixPlus (ahat,a,4);
+    dMultiply0_333 (t1,ahat,ahat);
+    dMultiply0_333 (t2,chat,chat);
+    for (i=0; i<3; i++) for (j=0; j<3; j++)
+        m->_I(i,j) += m->mass * (t2[i*4+j]-t1[i*4+j]);
+
+    // ensure perfect symmetry
+    m->_I(1,0) = m->_I(0,1);
+    m->_I(2,0) = m->_I(0,2);
+    m->_I(2,1) = m->_I(1,2);
+
+    // adjust center of mass
+    m->c[0] += x;
+    m->c[1] += y;
+    m->c[2] += z;
+
+# ifndef dNODEBUG
+    dMassCheck (m);
+# endif
+}
+
+
+void dMassRotate (dMass *m, const dMatrix3 R)
+{
+    // if the body is rotated by `R' relative to its point of reference,
+    // the new inertia about the point of reference is:
+    //
+    //   R * I * R'
+    //
+    // where I is the old inertia.
+
+    dMatrix3 t1;
+    dReal t2[3];
+
+    dAASSERT (m);
+
+    // rotate inertia matrix
+    dMultiply2_333 (t1,m->I,R);
+    dMultiply0_333 (m->I,R,t1);
+
+    // ensure perfect symmetry
+    m->_I(1,0) = m->_I(0,1);
+    m->_I(2,0) = m->_I(0,2);
+    m->_I(2,1) = m->_I(1,2);
+
+    // rotate center of mass
+    dMultiply0_331 (t2,R,m->c);
+    m->c[0] = t2[0];
+    m->c[1] = t2[1];
+    m->c[2] = t2[2];
+
+# ifndef dNODEBUG
+    dMassCheck (m);
+# endif
+}
+
+
+void dMassAdd (dMass *a, const dMass *b)
+{
+    int i;
+    dAASSERT (a && b);
+    dReal denom = dRecip (a->mass + b->mass);
+    for (i=0; i<3; i++) a->c[i] = (a->c[i]*a->mass + b->c[i]*b->mass)*denom;
+    a->mass += b->mass;
+    for (i=0; i<12; i++) a->I[i] += b->I[i];
+}
+
+
+// Backwards compatible API
+void dMassSetCappedCylinder(dMass *a, dReal b, int c, dReal d, dReal e)
+{
+    return dMassSetCapsule(a,b,c,d,e);
+}
+
+void dMassSetCappedCylinderTotal(dMass *a, dReal b, int c, dReal d, dReal e)
+{
+    return dMassSetCapsuleTotal(a,b,c,d,e);
+}
+
diff --git a/libs/ode-0.16.1/ode/src/mat.cpp b/libs/ode-0.16.1/ode/src/mat.cpp
new file mode 100644
index 0000000..67f5673
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/mat.cpp
@@ -0,0 +1,231 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/odeconfig.h>
+#include <ode/misc.h>
+#include <ode/error.h>
+#include <ode/memory.h>
+#include "config.h"
+#include "matrix.h"
+#include "mat.h"
+
+
+dMatrix::dMatrix()
+{
+    n = 0;
+    m = 0;
+    data = 0;
+}
+
+
+dMatrix::dMatrix (int rows, int cols)
+{
+    if (rows < 1 || cols < 1) dDebug (0,"bad matrix size");
+    n = rows;
+    m = cols;
+    data = (dReal*) dAlloc (n*m*sizeof(dReal));
+    dSetZero (data,n*m);
+}
+
+
+dMatrix::dMatrix (const dMatrix &a)
+{
+    n = a.n;
+    m = a.m;
+    data = (dReal*) dAlloc (n*m*sizeof(dReal));
+    memcpy (data,a.data,n*m*sizeof(dReal));
+}
+
+
+dMatrix::dMatrix (int rows, int cols,
+                  dReal *_data, int rowskip, int colskip)
+{
+    if (rows < 1 || cols < 1) dDebug (0,"bad matrix size");
+    n = rows;
+    m = cols;
+    data = (dReal*) dAlloc (n*m*sizeof(dReal));
+    for (int i=0; i<n; i++) {
+        for (int j=0; j<m; j++) data[i*m+j] = _data[i*rowskip + j*colskip];
+    }
+}
+
+
+dMatrix::~dMatrix()
+{
+    if (data) dFree (data,n*m*sizeof(dReal));
+}
+
+
+dReal & dMatrix::operator () (int i, int j)
+{
+    if (i < 0 || i >= n || j < 0 || j >= m) dDebug (0,"bad matrix (i,j)");
+    return data [i*m+j];
+}
+
+
+void dMatrix::operator= (const dMatrix &a)
+{
+    if (data) dFree (data,n*m*sizeof(dReal));
+    n = a.n;
+    m = a.m;
+    if (n > 0 && m > 0) {
+        data = (dReal*) dAlloc (n*m*sizeof(dReal));
+        memcpy (data,a.data,n*m*sizeof(dReal));
+    }
+    else data = 0;
+}
+
+
+void dMatrix::operator= (dReal a)
+{
+    for (int i=0; i<n*m; i++) data[i] = a;
+}
+
+
+dMatrix dMatrix::transpose()
+{
+    dMatrix r (m,n);
+    for (int i=0; i<n; i++) {
+        for (int j=0; j<m; j++) r.data[j*n+i] = data[i*m+j];
+    }
+    return r;
+}
+
+
+dMatrix dMatrix::select (int np, int *p, int nq, int *q)
+{
+    if (np < 1 || nq < 1) dDebug (0,"Matrix select, bad index array sizes");
+    dMatrix r (np,nq);
+    for (int i=0; i<np; i++) {
+        for (int j=0; j<nq; j++) {
+            if (p[i] < 0 || p[i] >= n || q[i] < 0 || q[i] >= m)
+                dDebug (0,"Matrix select, bad index arrays");
+            r.data[i*nq+j] = data[p[i]*m+q[j]];
+        }
+    }
+    return r;
+}
+
+
+dMatrix dMatrix::operator + (const dMatrix &a)
+{
+    if (n != a.n || m != a.m) dDebug (0,"matrix +, mismatched sizes");
+    dMatrix r (n,m);
+    for (int i=0; i<n*m; i++) r.data[i] = data[i] + a.data[i];
+    return r;
+}
+
+
+dMatrix dMatrix::operator - (const dMatrix &a)
+{
+    if (n != a.n || m != a.m) dDebug (0,"matrix -, mismatched sizes");
+    dMatrix r (n,m);
+    for (int i=0; i<n*m; i++) r.data[i] = data[i] - a.data[i];
+    return r;
+}
+
+
+dMatrix dMatrix::operator - ()
+{
+    dMatrix r (n,m);
+    for (int i=0; i<n*m; i++) r.data[i] = -data[i];
+    return r;
+}
+
+
+dMatrix dMatrix::operator * (const dMatrix &a)
+{
+    if (m != a.n) dDebug (0,"matrix *, mismatched sizes");
+    dMatrix r (n,a.m);
+    for (int i=0; i<n; i++) {
+        for (int j=0; j<a.m; j++) {
+            dReal sum = 0;
+            for (int k=0; k<m; k++) sum += data[i*m+k] * a.data[k*a.m+j];
+            r.data [i*a.m+j] = sum;
+        }
+    }
+    return r;
+}
+
+
+void dMatrix::operator += (const dMatrix &a)
+{
+    if (n != a.n || m != a.m) dDebug (0,"matrix +=, mismatched sizes");
+    for (int i=0; i<n*m; i++) data[i] += a.data[i];
+}
+
+
+void dMatrix::operator -= (const dMatrix &a)
+{
+    if (n != a.n || m != a.m) dDebug (0,"matrix -=, mismatched sizes");
+    for (int i=0; i<n*m; i++) data[i] -= a.data[i];
+}
+
+
+void dMatrix::clearUpperTriangle()
+{
+    if (n != m) dDebug (0,"clearUpperTriangle() only works on square matrices");
+    for (int i=0; i<n; i++) {
+        for (int j=i+1; j<m; j++) data[i*m+j] = 0;
+    }
+}
+
+
+void dMatrix::clearLowerTriangle()
+{
+    if (n != m) dDebug (0,"clearLowerTriangle() only works on square matrices");
+    for (int i=0; i<n; i++) {
+        for (int j=0; j<i; j++) data[i*m+j] = 0;
+    }
+}
+
+
+void dMatrix::makeRandom (dReal range)
+{
+    for (int i=0; i<n; i++) {
+        for (int j=0; j<m; j++)
+            data[i*m+j] = (dRandReal()*REAL(2.0)-REAL(1.0))*range;
+    }
+}
+
+
+void dMatrix::print (const char *fmt, FILE *f)
+{
+    for (int i=0; i<n; i++) {
+        for (int j=0; j<m; j++) fprintf (f,fmt,data[i*m+j]);
+        fprintf (f,"\n");
+    }
+}
+
+
+dReal dMatrix::maxDifference (const dMatrix &a)
+{
+    if (n != a.n || m != a.m) dDebug (0,"maxDifference(), mismatched sizes");
+    dReal max = 0;
+    for (int i=0; i<n; i++) {
+        for (int j=0; j<m; j++) {
+            dReal diff = dFabs(data[i*m+j] - a.data[i*m+j]);
+            if (diff > max) max = diff;
+        }
+    }
+    return max;
+}
diff --git a/libs/ode-0.16.1/ode/src/mat.h b/libs/ode-0.16.1/ode/src/mat.h
new file mode 100644
index 0000000..920c348
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/mat.h
@@ -0,0 +1,71 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// matrix class. this is mostly for convenience in the testing code, it is
+// not optimized at all. correctness is much more importance here.
+
+#ifndef _ODE_MAT_H_
+#define _ODE_MAT_H_
+
+#include <ode/common.h>
+
+
+class dMatrix {
+    int n,m;		// matrix dimension, n,m >= 0
+    dReal *data;		// if nonzero, n*m elements allocated on the heap
+
+public:
+    // constructors, destructors
+    dMatrix();				// make default 0x0 matrix
+    dMatrix (int rows, int cols);		// construct zero matrix of given size
+    dMatrix (const dMatrix &);		// construct copy of given matrix
+    // create copy of given data - element (i,j) is data[i*rowskip+j*colskip]
+    dMatrix (int rows, int cols, dReal *_data, int rowskip, int colskip);
+    ~dMatrix();				// destructor
+
+    // data movement
+    dReal & operator () (int i, int j);	// reference an element
+    void operator= (const dMatrix &);	// matrix = matrix
+    void operator= (dReal);		// matrix = scalar
+    dMatrix transpose();			// return transposed matrix
+    // return a permuted submatrix of this matrix, made up of the rows in p
+    // and the columns in q. p has np elements, q has nq elements.
+    dMatrix select (int np, int *p, int nq, int *q);
+
+    // operators
+    dMatrix operator + (const dMatrix &);
+    dMatrix operator - (const dMatrix &);
+    dMatrix operator - ();
+    dMatrix operator * (const dMatrix &);
+    void operator += (const dMatrix &);
+    void operator -= (const dMatrix &);
+
+    // utility
+    void clearUpperTriangle();
+    void clearLowerTriangle();
+    void makeRandom (dReal range);
+    void print (const char *fmt = "%10.4f ", FILE *f=stdout);
+    dReal maxDifference (const dMatrix &);
+};
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/matrix.cpp b/libs/ode-0.16.1/ode/src/matrix.cpp
new file mode 100644
index 0000000..cdb77af
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/matrix.cpp
@@ -0,0 +1,593 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/common.h>
+#include "config.h"
+#include "matrix.h"
+#include "objects.h"
+#include "threaded_solver_ldlt.h"
+
+#include <ode/memory.h>
+
+
+// misc defines
+#define ALLOCA dALLOCA16
+#define STACK_ALLOC_MAX 8192U
+
+/*extern */
+void dxMultiply0(dReal *A, const dReal *B, const dReal *C, unsigned p, unsigned q, unsigned r)
+{
+    dAASSERT (A && B && C && p>0 && q>0 && r>0);
+    const unsigned qskip = dPAD(q);
+    const unsigned rskip = dPAD(r);
+    dReal *aa = A;
+    const dReal *bb = B;
+    for (unsigned i = p; i != 0; aa+=rskip, bb+=qskip, --i) {
+        dReal *a = aa;
+        const dReal *cc = C, *ccend = C + r;
+        for (; cc != ccend; ++a, ++cc) {
+            dReal sum = REAL(0.0);
+            const dReal *c = cc;
+            const dReal *b = bb, *bend = bb + q;
+            for (; b != bend; c+=rskip, ++b) {
+                sum += (*b) * (*c);
+            }
+            (*a) = sum; 
+        }
+    }
+}
+
+
+/*extern */
+void dxMultiply1(dReal *A, const dReal *B, const dReal *C, unsigned p, unsigned q, unsigned r)
+{
+    dAASSERT (A && B && C && p>0 && q>0 && r>0);
+    const unsigned pskip = dPAD(p);
+    const unsigned rskip = dPAD(r);
+    dReal *aa = A;
+    const dReal *bb = B, *bbend = B + p;
+    for (; bb != bbend; aa += rskip, ++bb) {
+        dReal *a = aa;
+        const dReal *cc = C, *ccend = C + r;
+        for (; cc != ccend; ++a, ++cc) {
+            dReal sum = REAL(0.0);
+            const dReal *b = bb, *c = cc;
+            for (unsigned k = q; k != 0; b += pskip, c += rskip, --k) {
+                sum += (*b) * (*c);
+            }
+            (*a) = sum;
+        }
+    }
+}
+
+
+/*extern */
+void dxMultiply2(dReal *A, const dReal *B, const dReal *C, unsigned p, unsigned q, unsigned r)
+{
+    dAASSERT (A && B && C && p>0 && q>0 && r>0);
+    const unsigned rskip = dPAD(r);
+    const unsigned qskip = dPAD(q);
+    dReal *aa = A;
+    const dReal *bb = B;
+    for (unsigned i = p; i != 0; aa += rskip, bb += qskip, --i) {
+        dReal *a = aa, *aend = aa + r;
+        const dReal *cc = C;
+        for (; a != aend; cc+=qskip, ++a) {
+            dReal sum = REAL(0.0);
+            const dReal *b = bb, *c = cc, *cend = cc + q;
+            for (; c != cend; ++b, ++c) {
+                sum += (*b) * (*c);
+            }
+            (*a) = sum; 
+        }
+    }
+}
+
+
+/*extern */
+int dxFactorCholesky(dReal *A, unsigned n, void *tmpBuf/*[n]*/)
+{
+    dAASSERT (n > 0 && A);
+    bool failure = false;
+    
+    dReal *alloctedBuf = NULL;
+    sizeint allocatedSize;
+
+    const unsigned nskip = dPAD (n);
+    
+    dReal *recip = (dReal *)tmpBuf;
+    if (tmpBuf == NULL) {
+        allocatedSize = n * sizeof(dReal);
+        alloctedBuf = allocatedSize > STACK_ALLOC_MAX ? (dReal *)dAlloc(allocatedSize) : NULL;
+        recip = alloctedBuf != NULL ? alloctedBuf : (dReal*)ALLOCA(allocatedSize);
+    }
+
+    dReal *aa = A;
+    for (unsigned i = 0; i < n; aa += nskip, ++i) {
+        dReal *cc = aa;
+        {
+            const dReal *bb = A;
+            for (unsigned j = 0; j < i; bb += nskip, ++cc, ++j) {
+                dReal sum = *cc;
+                const dReal *a = aa, *b = bb, *bend = bb + j;
+                for (; b != bend; ++a, ++b) {
+                    sum -= (*a) * (*b);
+                }
+                *cc = sum * recip[j];
+            }
+        }
+        {
+            dReal sum = *cc;
+            dReal *a = aa, *aend = aa + i;
+            for (; a != aend; ++a) {
+                sum -= (*a)*(*a);
+            }
+            if (sum <= REAL(0.0)) {
+                failure = true;
+                break;
+            }
+            dReal sumsqrt = dSqrt(sum);
+            *cc = sumsqrt;
+            recip[i] = dRecip (sumsqrt);
+        }
+    }
+    
+    if (alloctedBuf != NULL) {
+        dFree(alloctedBuf, allocatedSize);
+    }
+
+    return failure ? 0 : 1;
+}
+
+
+/*extern */
+void dxSolveCholesky(const dReal *L, dReal *b, unsigned n, void *tmpBuf/*[n]*/)
+{
+    dAASSERT (n > 0 && L && b);
+
+    dReal *alloctedBuf = NULL;
+    sizeint allocatedSize;
+
+    const unsigned nskip = dPAD (n);
+
+    dReal *y = (dReal *)tmpBuf;
+    if (tmpBuf == NULL) {
+        allocatedSize = n * sizeof(dReal);
+        alloctedBuf = allocatedSize > STACK_ALLOC_MAX ? (dReal *)dAlloc(allocatedSize) : NULL;
+        y = alloctedBuf != NULL ? alloctedBuf : (dReal*)ALLOCA(allocatedSize);
+    }
+
+    {
+        const dReal *ll = L;
+        for (unsigned i = 0; i < n; ll += nskip, ++i) {
+            dReal sum = REAL(0.0);
+            for (unsigned k = 0; k < i; ++k) {
+                sum += ll[k] * y[k];
+            }
+            dIASSERT(ll[i] != dReal(0.0));
+            y[i] = (b[i] - sum) / ll[i];
+        }
+    }
+    {
+        const dReal *ll = L + (n - 1) * (nskip + 1);
+        for (unsigned i = n; i > 0; ll -= nskip + 1) {
+            --i;
+            dReal sum = REAL(0.0);
+            const dReal *l = ll + nskip;
+            for (unsigned k = i + 1; k < n; l += nskip, ++k) {
+                sum += (*l) * b[k];
+            }
+            dIASSERT(*ll != dReal(0.0));
+            b[i] = (y[i] - sum) / (*ll);
+        }
+    }
+
+    if (alloctedBuf != NULL) {
+        dFree(alloctedBuf, allocatedSize);
+    }
+}
+
+
+/*extern */
+int dxInvertPDMatrix(const dReal *A, dReal *Ainv, unsigned n, void *tmpBuf/*[nskip*(n+2)]*/)
+{
+    dAASSERT (n > 0 && A && Ainv);
+    bool success = false;
+
+    dReal *alloctedBuf = NULL;
+    sizeint allocatedSize;
+
+    sizeint choleskyFactorSize = dxEstimateFactorCholeskyTmpbufSize(n);
+    sizeint choleskySolveSize = dxEstimateSolveCholeskyTmpbufSize(n);
+    sizeint choleskyMaxSize = dMACRO_MAX(choleskyFactorSize, choleskySolveSize);
+    dIASSERT(choleskyMaxSize % sizeof(dReal) == 0);
+
+    const unsigned nskip = dPAD (n);
+    const sizeint nskip_mul_n = (sizeint)nskip * n;
+    
+    dReal *tmp = (dReal *)tmpBuf;
+    if (tmpBuf == NULL) {
+        allocatedSize = choleskyMaxSize + (nskip + nskip_mul_n) * sizeof(dReal);
+        alloctedBuf = allocatedSize > STACK_ALLOC_MAX ? (dReal *)dAlloc(allocatedSize) : NULL;
+        tmp = alloctedBuf != NULL ? alloctedBuf : (dReal*)ALLOCA(allocatedSize);
+    }
+
+    dReal *X = (dReal *)((char *)tmp + choleskyMaxSize);
+    dReal *L = X + nskip;
+    memcpy (L, A, nskip_mul_n * sizeof(dReal));
+    if (dxFactorCholesky(L, n, tmp)) {
+        dSetZero (Ainv, nskip_mul_n);	// make sure all padding elements set to 0
+        dReal *aa = Ainv, *xi = X, *xiend = X + n;
+        for (; xi != xiend; ++aa, ++xi) {
+            dSetZero(X, n);
+            *xi = REAL(1.0);
+            dxSolveCholesky(L, X, n, tmp);
+            dReal *a = aa;
+            const dReal *x = X, *xend = X + n;
+            for (; x != xend; a += nskip, ++x) {
+                *a = *x;
+            }
+        }
+        success = true;
+    }
+
+    if (alloctedBuf != NULL) {
+        dFree(alloctedBuf, allocatedSize);
+    }
+
+    return success ? 1 : 0;
+}
+
+
+/*extern */
+int dxIsPositiveDefinite(const dReal *A, unsigned n, void *tmpBuf/*[nskip*(n+1)]*/)
+{
+    dAASSERT (n > 0 && A);
+
+    dReal *alloctedBuf = NULL;
+    sizeint allocatedSize;
+
+    sizeint choleskyFactorSize = dxEstimateFactorCholeskyTmpbufSize(n);
+    dIASSERT(choleskyFactorSize % sizeof(dReal) == 0);
+
+    const unsigned nskip = dPAD (n);
+    const sizeint nskip_mul_n = (sizeint)nskip * n;
+    
+    dReal *tmp = (dReal *)tmpBuf;
+    if (tmpBuf == NULL) {
+        allocatedSize = choleskyFactorSize + nskip_mul_n * sizeof(dReal);
+        alloctedBuf = allocatedSize > STACK_ALLOC_MAX ? (dReal *)dAlloc(allocatedSize) : NULL;
+        tmp = alloctedBuf != NULL ? alloctedBuf : (dReal*)ALLOCA(allocatedSize);
+    }
+
+    dReal *Acopy = (dReal *)((char *)tmp + choleskyFactorSize);
+    memcpy(Acopy, A, nskip_mul_n * sizeof(dReal));
+    int factorResult = dxFactorCholesky (Acopy, n, tmp);
+
+    if (alloctedBuf != NULL) {
+        dFree(alloctedBuf, allocatedSize);
+    }
+
+    return factorResult;
+}
+
+
+/*extern */
+void dxLDLTAddTL(dReal *L, dReal *d, const dReal *a, unsigned n, unsigned nskip, void *tmpBuf/*[2*nskip]*/)
+{
+    dAASSERT(L && d && a && n > 0 && nskip >= n);
+
+    if (n < 2) return;
+
+    dReal *alloctedBuf = NULL;
+    sizeint allocatedSize;
+
+    dReal *W1 = (dReal *)tmpBuf;
+    if (tmpBuf == NULL) {
+        allocatedSize = nskip * (2 * sizeof(dReal));
+        alloctedBuf = allocatedSize > STACK_ALLOC_MAX ? (dReal *)dAlloc(allocatedSize) : NULL;
+        W1 = alloctedBuf != NULL ? alloctedBuf : (dReal*)ALLOCA(allocatedSize);
+    }
+
+    dReal *W2 = W1 + nskip;
+
+    W1[0] = REAL(0.0);
+    W2[0] = REAL(0.0);
+    for (unsigned j = 1; j < n; ++j) {
+        W1[j] = W2[j] = (dReal) (a[j] * M_SQRT1_2);
+    }
+    dReal W11 = (dReal) ((REAL(0.5)*a[0]+1)*M_SQRT1_2);
+    dReal W21 = (dReal) ((REAL(0.5)*a[0]-1)*M_SQRT1_2);
+
+    dReal alpha1 = REAL(1.0);
+    dReal alpha2 = REAL(1.0);
+
+    {
+        dReal dee = d[0];
+        dReal alphanew = alpha1 + (W11*W11)*dee;
+        dIASSERT(alphanew != dReal(0.0));
+        dee /= alphanew;
+        dReal gamma1 = W11 * dee;
+        dee *= alpha1;
+        alpha1 = alphanew;
+        alphanew = alpha2 - (W21*W21)*dee;
+        dee /= alphanew;
+        //dReal gamma2 = W21 * dee;
+        alpha2 = alphanew;
+        dReal k1 = REAL(1.0) - W21*gamma1;
+        dReal k2 = W21*gamma1*W11 - W21;
+        dReal *ll = L + nskip;
+        for (unsigned p = 1; p < n; ll += nskip, ++p) {
+            dReal Wp = W1[p];
+            dReal ell = *ll;
+            W1[p] =    Wp - W11*ell;
+            W2[p] = k1*Wp +  k2*ell;
+        }
+    }
+
+    dReal *ll = L + (nskip + 1);
+    for (unsigned j = 1; j < n; ll += nskip + 1, ++j) {
+        dReal k1 = W1[j];
+        dReal k2 = W2[j];
+
+        dReal dee = d[j];
+        dReal alphanew = alpha1 + (k1*k1)*dee;
+        dIASSERT(alphanew != dReal(0.0));
+        dee /= alphanew;
+        dReal gamma1 = k1 * dee;
+        dee *= alpha1;
+        alpha1 = alphanew;
+        alphanew = alpha2 - (k2*k2)*dee;
+        dee /= alphanew;
+        dReal gamma2 = k2 * dee;
+        dee *= alpha2;
+        d[j] = dee;
+        alpha2 = alphanew;
+
+        dReal *l = ll + nskip;
+        for (unsigned p = j + 1; p < n; l += nskip, ++p) {
+            dReal ell = *l;
+            dReal Wp = W1[p] - k1 * ell;
+            ell += gamma1 * Wp;
+            W1[p] = Wp;
+            Wp = W2[p] - k2 * ell;
+            ell -= gamma2 * Wp;
+            W2[p] = Wp;
+            *l = ell;
+        }
+    }
+
+    if (alloctedBuf != NULL) {
+        dFree(alloctedBuf, allocatedSize);
+    }
+}
+
+
+// macros for dLDLTRemove() for accessing A - either access the matrix
+// directly or access it via row pointers. we are only supposed to reference
+// the lower triangle of A (it is symmetric), but indexes i and j come from
+// permutation vectors so they are not predictable. so do a test on the
+// indexes - this should not slow things down too much, as we don't do this
+// in an inner loop.
+
+#define _GETA(i,j) (A[i][j])
+//#define _GETA(i,j) (A[(i)*nskip+(j)])
+#define GETA(i,j) ((i > j) ? _GETA(i,j) : _GETA(j,i))
+
+
+/*extern */
+void dxLDLTRemove(dReal **A, const unsigned *p, dReal *L, dReal *d,
+    unsigned n1, unsigned n2, unsigned r, unsigned nskip, void *tmpBuf/*n2 + 2*nskip*/)
+{
+    dAASSERT(A && p && L && d && n1 > 0 && n2 > 0 /*&& r >= 0 */&& r < n2 &&
+        n1 >= n2 && nskip >= n1);
+#ifndef dNODEBUG
+    for (unsigned i = 0; i < n2; ++i) dIASSERT(p[i] >= 0 && p[i] < n1);
+#endif
+
+    if (r == n2 - 1) {
+        return;		// deleting the last row/col is easy
+    }
+
+    dReal *alloctedBuf = NULL;
+    sizeint allocatedSize;
+
+    sizeint LDLTAddTLSize = dxEstimateLDLTAddTLTmpbufSize(nskip);
+    dIASSERT(LDLTAddTLSize % sizeof(dReal) == 0);
+    
+    dReal *tmp = (dReal *)tmpBuf;
+    if (tmpBuf == NULL) {
+        allocatedSize = LDLTAddTLSize + n2 * sizeof(dReal);
+        alloctedBuf = allocatedSize > STACK_ALLOC_MAX ? (dReal *)dAlloc(allocatedSize) : NULL;
+        tmp = alloctedBuf != NULL ? alloctedBuf : (dReal*)ALLOCA(allocatedSize);
+    }
+    
+    if (r == 0) {
+        dReal *a = (dReal *)((char *)tmp + LDLTAddTLSize);
+        const unsigned p_0 = p[0];
+        for (unsigned i = 0; i < n2; ++i) {
+            a[i] = -GETA(p[i],p_0);
+        }
+        a[0] += REAL(1.0);
+        dxLDLTAddTL (L, d, a, n2, nskip, tmp);
+    }
+    else {
+        dReal *t = (dReal *)((char *)tmp + LDLTAddTLSize);
+        {
+            dReal *Lcurr = L + r*nskip;
+            for (unsigned i = 0; i < r; ++Lcurr, ++i) {
+                dIASSERT(d[i] != dReal(0.0));
+                t[i] = *Lcurr / d[i];
+            }
+        }
+        dReal *a = t + r;
+        {
+            dReal *Lcurr = L + r * nskip;
+            const unsigned *pp_r = p + r, p_r = *pp_r;
+            const unsigned n2_minus_r = n2 - r;
+            for (unsigned i = 0; i < n2_minus_r; Lcurr += nskip, ++i) {
+                a[i] = dDot(Lcurr, t, r) - GETA(pp_r[i], p_r);
+            }
+        }
+        a[0] += REAL(1.0);
+        dxLDLTAddTL (L + (sizeint)(nskip + 1) * r, d + r, a, n2 - r, nskip, tmp);
+    }
+
+    // snip out row/column r from L and d
+    dxRemoveRowCol (L, n2, nskip, r);
+    if (r < (n2 - 1)) memmove (d + r, d + r + 1, (n2 - r - 1) * sizeof(dReal));
+
+    if (alloctedBuf != NULL) {
+        dFree(alloctedBuf, allocatedSize);
+    }
+}
+
+
+/*extern */
+void dxRemoveRowCol(dReal *A, unsigned n, unsigned nskip, unsigned r)
+{
+    dAASSERT(A && n > 0 && nskip >= n && r >= 0 && r < n);
+    if (r >= n - 1) return;
+    if (r > 0) {
+        {
+            const sizeint move_size = (n - r - 1) * sizeof(dReal);
+            dReal *Adst = A + r;
+            for (unsigned i = 0; i < r; Adst += nskip, ++i) {
+                dReal *Asrc = Adst + 1;
+                memmove (Adst, Asrc, move_size);
+            }
+        }
+        {
+            const sizeint cpy_size = r * sizeof(dReal);
+            dReal *Adst = A + (sizeint)nskip * r;
+            unsigned n1 = n - 1;
+            for (unsigned i = r; i < n1; ++i) {
+                dReal *Asrc = Adst + nskip;
+                memcpy (Adst, Asrc, cpy_size);
+                Adst = Asrc;
+            }
+        }
+    }
+    {
+        const sizeint cpy_size = (n - r - 1) * sizeof(dReal);
+        dReal *Adst = A + (sizeint)(nskip + 1) * r;
+        unsigned n1 = n - 1;
+        for (unsigned i = r; i < n1; ++i) {
+            dReal *Asrc = Adst + (nskip + 1);
+            memcpy (Adst, Asrc, cpy_size);
+            Adst = Asrc - 1;
+        }
+    }
+}
+
+
+#undef dSetZero
+#undef dSetValue
+//#undef dDot
+#undef dMultiply0
+#undef dMultiply1
+#undef dMultiply2
+#undef dFactorCholesky
+#undef dSolveCholesky
+#undef dInvertPDMatrix
+#undef dIsPositiveDefinite
+#undef dLDLTAddTL
+#undef dLDLTRemove
+#undef dRemoveRowCol
+
+
+/*extern ODE_API */
+void dSetZero(dReal *a, int n)
+{
+    dxSetZero(a, n);
+}
+
+/*extern ODE_API */
+void dSetValue(dReal *a, int n, dReal value)
+{
+    dxSetValue(a, n, value);
+}
+
+// dReal dDot (const dReal *a, const dReal *b, int n);
+
+/*extern ODE_API */
+void dMultiply0(dReal *A, const dReal *B, const dReal *C, int p,int q,int r)
+{
+    dxMultiply0(A, B, C, p, q, r);
+}
+
+/*extern ODE_API */
+void dMultiply1(dReal *A, const dReal *B, const dReal *C, int p,int q,int r)
+{
+    dxMultiply1(A, B, C, p, q, r);
+}
+
+/*extern ODE_API */
+void dMultiply2(dReal *A, const dReal *B, const dReal *C, int p,int q,int r)
+{
+    dxMultiply2(A, B, C, p, q, r);
+}
+
+/*extern ODE_API */
+int dFactorCholesky(dReal *A, int n)
+{
+    return dxFactorCholesky(A, n, NULL);
+}
+
+/*extern ODE_API */
+void dSolveCholesky(const dReal *L, dReal *b, int n)
+{
+    dxSolveCholesky(L, b, n, NULL);
+}
+
+/*extern ODE_API */
+int dInvertPDMatrix (const dReal *A, dReal *Ainv, int n)
+{
+    return dxInvertPDMatrix(A, Ainv, n, NULL);
+}
+
+/*extern ODE_API */
+int dIsPositiveDefinite(const dReal *A, int n)
+{
+    return dxIsPositiveDefinite(A, n, NULL);
+}
+
+
+/*extern ODE_API */
+void dLDLTAddTL(dReal *L, dReal *d, const dReal *a, int n, int nskip)
+{
+    dxLDLTAddTL(L, d, a, n, nskip, NULL);
+}
+
+/*extern ODE_API */
+void dLDLTRemove(dReal **A, const int *p, dReal *L, dReal *d, int n1, int n2, int r, int nskip)
+{
+    dxLDLTRemove(A, (const unsigned *)p, L, d, n1, n2, r, nskip, NULL);
+    dSASSERT(sizeof(unsigned) == sizeof(*p));
+}
+
+/*extern ODE_API */
+void dRemoveRowCol(dReal *A, int n, int nskip, int r)
+{
+    dxRemoveRowCol(A, n, nskip, r);
+}
+
diff --git a/libs/ode-0.16.1/ode/src/matrix.h b/libs/ode-0.16.1/ode/src/matrix.h
new file mode 100644
index 0000000..b723722
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/matrix.h
@@ -0,0 +1,160 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/* 
+ * optimized and unoptimized vector and matrix functions 
+ * (inlined private versions)
+ */
+
+#ifndef _ODE__PRIVATE_MATRIX_H_
+#define _ODE__PRIVATE_MATRIX_H_
+
+
+#include <ode/matrix.h>
+
+
+#ifdef __cplusplus
+
+template <unsigned a_stride, typename element_type>
+ODE_INLINE
+void dxtSetZero (element_type *a, sizeint n)
+{
+    element_type *const aend = a + n * a_stride;
+    for (element_type *acurr = a; acurr != aend; acurr += a_stride) {
+        *acurr = (element_type)0;
+    }
+}
+
+template <typename element_type>
+ODE_INLINE
+void dxSetZero (element_type *a, sizeint n)
+{
+    dxtSetZero<1>(a, n);
+}
+
+template <typename element_type>
+ODE_INLINE
+void dxSetValue (element_type *a, sizeint n, element_type value)
+{
+    element_type *const aend = a + n;
+    for (element_type *acurr = a; acurr != aend; ++acurr) {
+        *acurr = value;
+    }
+}
+
+
+#else // #ifndef __cplusplus
+
+ODE_PURE_INLINE
+void dxSetZero (dReal *a, sizeint n)
+{
+    dReal *const aend = a + n;
+    dReal *acurr;
+    for (acurr = a; acurr != aend; ++acurr) {
+        *acurr = 0;
+    }
+}
+
+ODE_PURE_INLINE
+void dxSetValue (dReal *a, sizeint n, dReal value)
+{
+    dReal *const aend = a + n;
+    dReal *acurr;
+    for (acurr = a; acurr != aend; ++acurr) {
+        *acurr = value;
+    }
+}
+
+
+#endif // #ifdef __cplusplus
+
+
+dReal dxDot (const dReal *a, const dReal *b, unsigned n);
+void dxMultiply0 (dReal *A, const dReal *B, const dReal *C, unsigned p, unsigned q, unsigned r);
+void dxMultiply1 (dReal *A, const dReal *B, const dReal *C, unsigned p, unsigned q, unsigned r);
+void dxMultiply2 (dReal *A, const dReal *B, const dReal *C, unsigned p, unsigned q, unsigned r);
+int dxFactorCholesky (dReal *A, unsigned n, void *tmpbuf);
+void dxSolveCholesky (const dReal *L, dReal *b, unsigned n, void *tmpbuf);
+int dxInvertPDMatrix (const dReal *A, dReal *Ainv, unsigned n, void *tmpbuf);
+int dxIsPositiveDefinite (const dReal *A, unsigned n, void *tmpbuf);
+void dxLDLTAddTL (dReal *L, dReal *d, const dReal *a, unsigned n, unsigned nskip, void *tmpbuf);
+void dxLDLTRemove (dReal **A, const unsigned *p, dReal *L, dReal *d, unsigned n1, unsigned n2, unsigned r, unsigned nskip, void *tmpbuf);
+void dxRemoveRowCol (dReal *A, unsigned n, unsigned nskip, unsigned r);
+
+ODE_PURE_INLINE sizeint dxEstimateFactorCholeskyTmpbufSize(unsigned n)
+{
+    return dPAD(n) * sizeof(dReal);
+}
+
+ODE_PURE_INLINE sizeint dxEstimateSolveCholeskyTmpbufSize(unsigned n)
+{
+    return dPAD(n) * sizeof(dReal);
+}
+
+ODE_PURE_INLINE sizeint dxEstimateInvertPDMatrixTmpbufSize(unsigned n)
+{
+    sizeint FactorCholesky_size = dxEstimateFactorCholeskyTmpbufSize(n);
+    sizeint SolveCholesky_size = dxEstimateSolveCholeskyTmpbufSize(n);
+    sizeint MaxCholesky_size = FactorCholesky_size > SolveCholesky_size ? FactorCholesky_size : SolveCholesky_size;
+    return (sizeint)dPAD(n) * (n + 1) * sizeof(dReal) + MaxCholesky_size;
+}
+
+ODE_PURE_INLINE sizeint dxEstimateIsPositiveDefiniteTmpbufSize(unsigned n)
+{
+    return (sizeint)dPAD(n) * n * sizeof(dReal) + dxEstimateFactorCholeskyTmpbufSize(n);
+}
+
+ODE_PURE_INLINE sizeint dxEstimateLDLTAddTLTmpbufSize(unsigned nskip)
+{
+    return nskip * (2 * sizeof(dReal));
+}
+
+ODE_PURE_INLINE sizeint dxEstimateLDLTRemoveTmpbufSize(unsigned n2, unsigned nskip)
+{
+    return n2 * sizeof(dReal) + dxEstimateLDLTAddTLTmpbufSize(nskip);
+}
+
+/* For internal use */
+#define dSetZero(a, n) dxSetZero(a, n)
+#define dSetValue(a, n, value) dxSetValue(a, n, value)
+#define dDot(a, b, n) dxDot(a, b, n)
+#define dMultiply0(A, B, C, p, q, r) dxMultiply0(A, B, C, p, q, r)
+#define dMultiply1(A, B, C, p, q, r) dxMultiply1(A, B, C, p, q, r)
+#define dMultiply2(A, B, C, p, q, r) dxMultiply2(A, B, C, p, q, r)
+#define dFactorCholesky(A, n, tmpbuf) dxFactorCholesky(A, n, tmpbuf)
+#define dSolveCholesky(L, b, n, tmpbuf) dxSolveCholesky(L, b, n, tmpbuf)
+#define dInvertPDMatrix(A, Ainv, n, tmpbuf) dxInvertPDMatrix(A, Ainv, n, tmpbuf)
+#define dIsPositiveDefinite(A, n, tmpbuf) dxIsPositiveDefinite(A, n, tmpbuf)
+#define dLDLTAddTL(L, d, a, n, nskip, tmpbuf) dxLDLTAddTL(L, d, a, n, nskip, tmpbuf)
+#define dLDLTRemove(A, p, L, d, n1, n2, r, nskip, tmpbuf) dxLDLTRemove(A, p, L, d, n1, n2, r, nskip, tmpbuf)
+#define dRemoveRowCol(A, n, nskip, r) dxRemoveRowCol(A, n, nskip, r)
+
+
+#define dEstimateFactorCholeskyTmpbufSize(n) dxEstimateFactorCholeskyTmpbufSize(n)
+#define dEstimateSolveCholeskyTmpbufSize(n) dxEstimateSolveCholeskyTmpbufSize(n)
+#define dEstimateInvertPDMatrixTmpbufSize(n) dxEstimateInvertPDMatrixTmpbufSize(n)
+#define dEstimateIsPositiveDefiniteTmpbufSize(n) dxEstimateIsPositiveDefiniteTmpbufSize(n)
+#define dEstimateLDLTAddTLTmpbufSize(nskip) dxEstimateLDLTAddTLTmpbufSize(nskip)
+#define dEstimateLDLTRemoveTmpbufSize(n2, nskip) dxEstimateLDLTRemoveTmpbufSize(n2, nskip)
+
+
+#endif // #ifndef _ODE__PRIVATE_MATRIX_H_
diff --git a/libs/ode-0.16.1/ode/src/memory.cpp b/libs/ode-0.16.1/ode/src/memory.cpp
new file mode 100644
index 0000000..5a67448
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/memory.cpp
@@ -0,0 +1,95 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/odeconfig.h>
+#include <ode/memory.h>
+#include <ode/error.h>
+#include "config.h"
+
+
+static dAllocFunction *allocfn = 0;
+static dReallocFunction *reallocfn = 0;
+static dFreeFunction *freefn = 0;
+
+#ifdef __MINGW32__
+/* 
+this is a guard against AC_FUNC_MALLOC and AC_FUNC_REALLOC
+which break cross compilation, no issues in native MSYS.
+*/
+#undef malloc
+#undef realloc
+#endif
+
+void dSetAllocHandler (dAllocFunction *fn)
+{
+    allocfn = fn;
+}
+
+
+void dSetReallocHandler (dReallocFunction *fn)
+{
+    reallocfn = fn;
+}
+
+
+void dSetFreeHandler (dFreeFunction *fn)
+{
+    freefn = fn;
+}
+
+
+dAllocFunction *dGetAllocHandler()
+{
+    return allocfn;
+}
+
+
+dReallocFunction *dGetReallocHandler()
+{
+    return reallocfn;
+}
+
+
+dFreeFunction *dGetFreeHandler()
+{
+    return freefn;
+}
+
+
+void * dAlloc (sizeint size)
+{
+    if (allocfn) return allocfn (size); else return malloc (size);
+}
+
+
+void * dRealloc (void *ptr, sizeint oldsize, sizeint newsize)
+{
+    if (reallocfn) return reallocfn (ptr,oldsize,newsize);
+    else return realloc (ptr,newsize);
+}
+
+
+void dFree (void *ptr, sizeint size)
+{
+    if (!ptr) return;
+    if (freefn) freefn (ptr,size); else free (ptr);
+}
diff --git a/libs/ode-0.16.1/ode/src/misc.cpp b/libs/ode-0.16.1/ode/src/misc.cpp
new file mode 100644
index 0000000..e63a029
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/misc.cpp
@@ -0,0 +1,217 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/odeconfig.h>
+#include <ode/misc.h>
+#include "config.h"
+#include "matrix.h"
+#include "error.h"
+#include "odeou.h"
+
+//****************************************************************************
+// random numbers
+
+static volatile duint32 seed = 0;
+
+unsigned long dRand()
+{
+    duint32 origSeed, newSeed;
+#if !dTHREADING_INTF_DISABLED
+    do {
+#endif
+        origSeed = seed;
+        newSeed = ((duint32)1664525 * origSeed + (duint32)1013904223) & (duint32)0xffffffff;
+#if dTHREADING_INTF_DISABLED
+        seed = newSeed;
+#else
+    } while (!AtomicCompareExchange((volatile atomicord32 *)&seed, origSeed, newSeed));
+#endif
+    return newSeed;
+}
+
+
+unsigned long  dRandGetSeed()
+{
+    return seed;
+}
+
+
+void dRandSetSeed (unsigned long s)
+{
+    seed = s;
+}
+
+
+int dTestRand()
+{
+    unsigned long oldseed = seed;
+    int ret = 1;
+    seed = 0;
+    if (dRand() != 0x3c6ef35f || dRand() != 0x47502932 ||
+        dRand() != 0xd1ccf6e9 || dRand() != 0xaaf95334 ||
+        dRand() != 0x6252e503) ret = 0;
+    seed = oldseed;
+    return ret;
+}
+
+
+// adam's all-int straightforward(?) dRandInt (0..n-1)
+int dRandInt (int n)
+{
+    int result;
+    // Since there is no memory barrier macro in ODE assign via volatile variable 
+    // to prevent compiler reusing seed as value of `r'
+    volatile unsigned long raw_r = dRand();
+    duint32 r = (duint32)raw_r;
+    
+    duint32 un = n;
+    dIASSERT(sizeof(n) == sizeof(un));
+
+    // note: probably more aggressive than it needs to be -- might be
+    //       able to get away without one or two of the innermost branches.
+    // if (un <= 0x00010000UL) {
+    //     r ^= (r >> 16);
+    //     if (un <= 0x00000100UL) {
+    //         r ^= (r >> 8);
+    //         if (un <= 0x00000010UL) {
+    //             r ^= (r >> 4);
+    //             if (un <= 0x00000004UL) {
+    //                 r ^= (r >> 2);
+    //                 if (un <= 0x00000002UL) {
+    //                     r ^= (r >> 1);
+    //                 }
+    //             }
+    //         }
+    //     }
+    // }
+    // Optimized version of above
+    if (un <= (duint32)0x00000010) {
+        r ^= (r >> 16);
+        r ^= (r >> 8);
+        r ^= (r >> 4);
+        if (un <= (duint32)0x00000002) {
+            r ^= (r >> 2);
+            r ^= (r >> 1);
+            result = (r/* & (duint32)0x01*/) & (un >> 1);
+        } else {
+            if (un <= (duint32)0x00000004) {
+                r ^= (r >> 2);
+                result = ((r & (duint32)0x03) * un) >> 2;
+            } else {
+                result = ((r & (duint32)0x0F) * un) >> 4;
+            }
+        }
+    } else {
+        if (un <= (duint32)0x00000100) {
+            r ^= (r >> 16);
+            r ^= (r >> 8);
+            result = ((r & (duint32)0xFF) * un) >> 8;
+        } else {
+            if (un <= (duint32)0x00010000) {
+                r ^= (r >> 16);
+                result = ((r & (duint32)0xFFFF) * un) >> 16;
+            } else {
+                result = (int)(((duint64)r * un) >> 32);
+            }
+        }
+    }
+
+    return result;
+}
+
+
+dReal dRandReal()
+{
+    return (dReal)(((double) dRand()) / ((double) 0xffffffff));
+}
+
+//****************************************************************************
+// matrix utility stuff
+
+void dPrintMatrix (const dReal *A, int n, int m, const char *fmt, FILE *f)
+{
+    int skip = dPAD(m);
+    const dReal *Arow = A;
+    for (int i=0; i<n; Arow+=skip, ++i) {
+        for (int j=0; j<m; ++j) fprintf (f,fmt,Arow[j]);
+        fprintf (f,"\n");
+    }
+}
+
+
+void dMakeRandomVector (dReal *A, int n, dReal range)
+{
+    int i;
+    for (i=0; i<n; i++) A[i] = (dRandReal()*REAL(2.0)-REAL(1.0))*range;
+}
+
+
+void dMakeRandomMatrix (dReal *A, int n, int m, dReal range)
+{
+    int skip = dPAD(m);
+    //  dSetZero (A,n*skip);
+    dReal *Arow = A;
+    for (int i=0; i<n; Arow+=skip, ++i) {
+        for (int j=0; j<m; ++j) Arow[j] = (dRandReal()*REAL(2.0)-REAL(1.0))*range;
+    }
+}
+
+
+void dClearUpperTriangle (dReal *A, int n)
+{
+    int skip = dPAD(n);
+    dReal *Arow = A;
+    for (int i=0; i<n; Arow+=skip, ++i) {
+        for (int j=i+1; j<n; ++j) Arow[j] = 0;
+    }
+}
+
+
+dReal dMaxDifference (const dReal *A, const dReal *B, int n, int m)
+{
+    int skip = dPAD(m);
+    dReal max = REAL(0.0);
+    const dReal *Arow = A, *Brow = B;
+    for (int i=0; i<n; Arow+=skip, Brow +=skip, ++i) {
+        for (int j=0; j<m; ++j) {
+            dReal diff = dFabs(Arow[j] - Brow[j]);
+            if (diff > max) max = diff;
+        }
+    }
+    return max;
+}
+
+
+dReal dMaxDifferenceLowerTriangle (const dReal *A, const dReal *B, int n)
+{
+    int skip = dPAD(n);
+    dReal max = REAL(0.0);
+    const dReal *Arow = A, *Brow = B;
+    for (int i=0; i<n; Arow+=skip, Brow+=skip, ++i) {
+        for (int j=0; j<=i; ++j) {
+            dReal diff = dFabs(Arow[j] - Brow[j]);
+            if (diff > max) max = diff;
+        }
+    }
+    return max;
+}
+
diff --git a/libs/ode-0.16.1/ode/src/nextafterf.c b/libs/ode-0.16.1/ode/src/nextafterf.c
new file mode 100644
index 0000000..78fbe31
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/nextafterf.c
@@ -0,0 +1,115 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/* _nextafterf() implementation for MSVC */
+
+#include <ode/common.h>
+#include "config.h"
+
+
+#if defined(_ODE__NEXTAFTERF_REQUIRED)
+
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice 
+ * is preserved.
+ * ====================================================
+ */
+
+/* A union which permits us to convert between a float and a 32 bit int. */
+
+typedef union
+{
+    float value;
+    uint32 word;
+} ieee_float_shape_type;
+
+/* Get a 32 bit int from a float.  */
+
+#define GET_FLOAT_WORD(i,d)					\
+    do {								\
+        volatile ieee_float_shape_type gf_u;					\
+        gf_u.value = (d);						\
+        (i) = gf_u.word;						\
+    } while (0)
+
+/* Set a float from a 32 bit int.  */
+
+#define SET_FLOAT_WORD(d,i)					\
+    do {								\
+        volatile ieee_float_shape_type sf_u;					\
+        sf_u.word = (i);						\
+        (d) = sf_u.value;						\
+    } while (0)
+
+
+#undef nextafterf
+float _nextafterf(float x, float y)
+{
+    int32 hx,hy,ix,iy;
+
+    GET_FLOAT_WORD(hx,x);
+    GET_FLOAT_WORD(hy,y);
+    ix = hx&0x7fffffff;		/* |x| */
+    iy = hy&0x7fffffff;		/* |y| */
+
+    if((ix>0x7f800000) ||   /* x is nan */ 
+        (iy>0x7f800000))     /* y is nan */ 
+        return x+y;				
+    if(x==y) return x;		/* x=y, return x */
+    if(ix==0) {				/* x == 0 */
+        SET_FLOAT_WORD(x,(hy&0x80000000)|1);/* return +-minsubnormal */
+        y = x*x;
+        if(y==x) return y; else return x;	/* raise underflow flag */
+    } 
+    if(hx>=0) {				/* x > 0 */
+        if(hx>hy) {				/* x > y, x -= ulp */
+            hx -= 1;
+        } else {				/* x < y, x += ulp */
+            hx += 1;
+        }
+    } else {				/* x < 0 */
+        if(hy>=0||hx>hy){			/* x < y, x -= ulp */
+            hx -= 1;
+        } else {				/* x > y, x += ulp */
+            hx += 1;
+        }
+    }
+    hy = hx&0x7f800000;
+    if(hy>=0x7f800000) return x+x;	/* overflow  */
+    if(hy<0x00800000) {		/* underflow */
+        y = x*x;
+        if(y!=x) {		/* raise underflow flag */
+            SET_FLOAT_WORD(y,hx);
+            return y;
+        }
+    }
+    SET_FLOAT_WORD(x,hx);
+    return x;
+}
+
+
+#endif /* #if defined(_ODE__NEXTAFTERF_REQUIRED) */
diff --git a/libs/ode-0.16.1/ode/src/objects.cpp b/libs/ode-0.16.1/ode/src/objects.cpp
new file mode 100644
index 0000000..e024aca
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/objects.cpp
@@ -0,0 +1,138 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// Object, body, and world methods.
+
+
+#include <ode/common.h>
+#include <ode/threading_impl.h>
+#include <ode/objects.h>
+#include "config.h"
+#include "objects.h"
+#include "default_threading.h"
+#include "threading_impl.h"
+#include "matrix.h"
+#include "util.h"
+
+
+#define dWORLD_DEFAULT_GLOBAL_ERP REAL(0.2)
+
+#if defined(dSINGLE)
+#define dWORLD_DEFAULT_GLOBAL_CFM REAL(1e-5)
+#elif defined(dDOUBLE)
+#define dWORLD_DEFAULT_GLOBAL_CFM REAL(1e-10)
+#else
+#error dSINGLE or dDOUBLE must be defined
+#endif
+
+
+dObject::~dObject()
+{
+    // Do nothing - a virtual destructor
+}
+
+
+dxAutoDisable::dxAutoDisable(void *):
+    idle_time(REAL(0.0)),
+    idle_steps(10),
+    average_samples(1), // Default is 1 sample => Instantaneous velocity
+    linear_average_threshold(REAL(0.01)*REAL(0.01)), // (magnitude squared)
+    angular_average_threshold(REAL(0.01)*REAL(0.01)) // (magnitude squared)
+{
+}
+
+dxDampingParameters::dxDampingParameters(void *):
+    linear_scale(REAL(0.0)),
+    angular_scale(REAL(0.0)),
+    linear_threshold(REAL(0.01) * REAL(0.01)),
+    angular_threshold(REAL(0.01) * REAL(0.01))
+{
+}
+
+dxQuickStepParameters::dxQuickStepParameters(void *):
+    num_iterations(20),
+    w(REAL(1.3))
+{
+}
+
+dxContactParameters::dxContactParameters(void *):
+    max_vel(dInfinity),
+    min_depth(REAL(0.0))
+{
+}
+
+dxWorld::dxWorld():
+    dBase(),
+    dxThreadingBase(),
+    firstbody(NULL),
+    firstjoint(NULL),
+    nb(0),
+    nj(0),
+    global_erp(dWORLD_DEFAULT_GLOBAL_ERP),
+    global_cfm(dWORLD_DEFAULT_GLOBAL_CFM),
+    adis(NULL),
+    body_flags(0),
+    islands_max_threads(dWORLDSTEP_THREADCOUNT_UNLIMITED),
+    wmem(NULL),
+    qs(NULL),
+    contactp(NULL),
+    dampingp(NULL),
+    max_angular_speed(dInfinity),
+    userdata(0)
+{
+    dxThreadingBase::setThreadingDefaultImplProvider(this);
+
+    dSetZero (gravity, 4);
+}
+
+dxWorld::~dxWorld()
+{
+    if (wmem)
+    {
+        wmem->CleanupWorldReferences(this);
+        wmem->Release();
+    }
+}
+
+
+void dxWorld::assignThreadingImpl(const dxThreadingFunctionsInfo *functions_info, dThreadingImplementationID threading_impl)
+{
+    if (wmem != NULL)
+    {
+        // Free objects allocated with old threading
+        wmem->CleanupWorldReferences(this);
+    }
+
+    dxThreadingBase::assignThreadingImpl(functions_info, threading_impl);
+}
+
+dxWorldProcessContext *dxWorld::unsafeGetWorldProcessingContext() const
+{
+    return wmem->GetWorldProcessingContext();
+}
+
+const dxThreadingFunctionsInfo *dxWorld::retrieveThreadingDefaultImpl(dThreadingImplementationID &out_defaultImpl)
+{
+    out_defaultImpl = DefaultThreadingHolder::getDefaultThreadingImpl();
+    return DefaultThreadingHolder::getDefaultThreadingFunctions();
+}
+
diff --git a/libs/ode-0.16.1/ode/src/objects.h b/libs/ode-0.16.1/ode/src/objects.h
new file mode 100644
index 0000000..0e7d34f
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/objects.h
@@ -0,0 +1,206 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+// object, body, and world structs.
+
+
+#ifndef _ODE__PRIVATE_OBJECTS_H_
+#define _ODE__PRIVATE_OBJECTS_H_
+
+
+#include <ode/common.h>
+#include <ode/memory.h>
+#include <ode/mass.h>
+#include "error.h"
+#include "array.h"
+#include "common.h"
+#include "threading_base.h"
+
+
+struct dxJointNode;
+class dxStepWorkingMemory;
+class dxWorldProcessContext;
+
+
+// some body flags
+
+enum {
+    dxBodyFlagFiniteRotation =        1,  // use finite rotations
+    dxBodyFlagFiniteRotationAxis =    2,  // use finite rotations only along axis
+    dxBodyDisabled =                  4,  // body is disabled
+    dxBodyNoGravity =                 8,  // body is not influenced by gravity
+    dxBodyAutoDisable =               16, // enable auto-disable on body
+    dxBodyLinearDamping =             32, // use linear damping
+    dxBodyAngularDamping =            64, // use angular damping
+    dxBodyMaxAngularSpeed =           128,// use maximum angular speed
+    dxBodyGyroscopic =                256 // use gyroscopic term
+};
+
+
+// base class that does correct object allocation / deallocation
+
+struct dBase {
+    void *operator new (size_t size) { return dAlloc (size); }
+    void *operator new (size_t, void *p) { return p; }
+    void operator delete (void *ptr, size_t size) { dFree (ptr,size); }
+    void *operator new[] (size_t size) { return dAlloc (size); }
+    void operator delete[] (void *ptr, size_t size) { dFree (ptr,size); }
+};
+
+
+// base class for bodies and joints
+
+struct dObject : public dBase {
+    dxWorld *world;		// world this object is in
+    dObject *next;		// next object of this type in list
+    dObject **tome;		// pointer to previous object's next ptr
+    int tag;			// used by dynamics algorithms
+    void *userdata;		// user settable data
+
+    explicit dObject(dxWorld *w): world(w), next(NULL), tome(NULL), tag(0), userdata(NULL) {}
+    virtual ~dObject();
+};
+
+
+// auto disable parameters
+struct dxAutoDisable {
+    dReal idle_time;		// time the body needs to be idle to auto-disable it
+    int idle_steps;		// steps the body needs to be idle to auto-disable it
+    unsigned int average_samples;     // size of the average_lvel and average_avel buffers
+    dReal linear_average_threshold;   // linear (squared) average velocity threshold
+    dReal angular_average_threshold;  // angular (squared) average velocity threshold
+
+    dxAutoDisable() {}
+    explicit dxAutoDisable(void *);
+};
+
+
+// damping parameters
+struct dxDampingParameters {
+    dReal linear_scale;  // multiply the linear velocity by (1 - scale)
+    dReal angular_scale; // multiply the angular velocity by (1 - scale)
+    dReal linear_threshold;   // linear (squared) average speed threshold
+    dReal angular_threshold;  // angular (squared) average speed threshold
+
+    dxDampingParameters() {}
+    explicit dxDampingParameters(void *);
+};
+
+
+// quick-step parameters
+struct dxQuickStepParameters {
+    int num_iterations;		// number of SOR iterations to perform
+    dReal w;			// the SOR over-relaxation parameter
+
+    dxQuickStepParameters() {}
+    explicit dxQuickStepParameters(void *);
+};
+
+
+// contact generation parameters
+struct dxContactParameters {
+    dReal max_vel;		// maximum correcting velocity
+    dReal min_depth;		// thickness of 'surface layer'
+
+    dxContactParameters() {}
+    explicit dxContactParameters(void *);
+};
+
+// position vector and rotation matrix for geometry objects that are not
+// connected to bodies.
+struct dxPosR {
+    dVector3 pos;
+    dMatrix3 R;
+};
+
+struct dxBody : public dObject {
+    dxJointNode *firstjoint;	// list of attached joints
+    unsigned flags;			// some dxBodyFlagXXX flags
+    dGeomID geom;			// first collision geom associated with body
+    dMass mass;			// mass parameters about POR
+    dMatrix3 invI;		// inverse of mass.I
+    dReal invMass;		// 1 / mass.mass
+    dxPosR posr;			// position and orientation of point of reference
+    dQuaternion q;		// orientation quaternion
+    dVector3 lvel,avel;		// linear and angular velocity of POR
+    dVector3 facc,tacc;		// force and torque accumulators
+    dVector3 finite_rot_axis;	// finite rotation axis, unit length or 0=none
+
+    // auto-disable information
+    dxAutoDisable adis;		// auto-disable parameters
+    dReal adis_timeleft;		// time left to be idle
+    int adis_stepsleft;		// steps left to be idle
+    dVector3* average_lvel_buffer;      // buffer for the linear average velocity calculation
+    dVector3* average_avel_buffer;      // buffer for the angular average velocity calculation
+    unsigned int average_counter;      // counter/index to fill the average-buffers
+    int average_ready;            // indicates ( with = 1 ), if the Body's buffers are ready for average-calculations
+
+    void (*moved_callback)(dxBody*); // let the user know the body moved
+    dxDampingParameters dampingp; // damping parameters, depends on flags
+    dReal max_angular_speed;      // limit the angular velocity to this magnitude
+
+    dxBody(dxWorld *w);
+};
+
+
+struct dxWorld : public dBase, public dxThreadingBase, private dxIThreadingDefaultImplProvider {
+    dxBody *firstbody;		// body linked list
+    dxJoint *firstjoint;		// joint linked list
+    int nb,nj;			// number of bodies and joints in lists
+    dVector3 gravity;		// gravity vector (m/s/s)
+    dReal global_erp;		// global error reduction parameter
+    dReal global_cfm;		// global constraint force mixing parameter
+    dxAutoDisable adis;		// auto-disable parameters
+    int body_flags;               // flags for new bodies
+    unsigned islands_max_threads; // maximum threads to allocate for island processing
+    dxStepWorkingMemory *wmem; // Working memory object for dWorldStep/dWorldQuickStep
+
+    dxQuickStepParameters qs;
+    dxContactParameters contactp;
+    dxDampingParameters dampingp; // damping parameters
+    dReal max_angular_speed;      // limit the angular velocity to this magnitude
+
+    void* userdata;
+
+    dxWorld();
+    virtual ~dxWorld(); // Compilers issue warnings if a class with virtual methods does not have a virtual destructor :(
+
+    void assignThreadingImpl(const dxThreadingFunctionsInfo *functions_info, dThreadingImplementationID threading_impl);
+    
+    unsigned calculateIslandProcessingMaxThreadCount(unsigned *ptrOut_activeThreadCount=NULL) const 
+    {
+        unsigned activeThreadCount, *ptrActiveThreadCountToUse = ptrOut_activeThreadCount != NULL ? &activeThreadCount : NULL;
+        unsigned limitedCount = calculateThreadingLimitedThreadCount(islands_max_threads, false, ptrActiveThreadCountToUse);
+        if (ptrOut_activeThreadCount != NULL) {
+            *ptrOut_activeThreadCount = dMACRO_MAX(activeThreadCount, 1U);
+        }
+        return dMACRO_MAX(limitedCount, 1U); 
+    }
+    
+    dxWorldProcessContext *unsafeGetWorldProcessingContext() const;
+
+private: // dxIThreadingDefaultImplProvider
+    virtual const dxThreadingFunctionsInfo *retrieveThreadingDefaultImpl(dThreadingImplementationID &out_defaultImpl);
+};
+
+
+#endif // #ifndef _ODE__PRIVATE_OBJECTS_H_
diff --git a/libs/ode-0.16.1/ode/src/obstack.cpp b/libs/ode-0.16.1/ode/src/obstack.cpp
new file mode 100644
index 0000000..541f0e3
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/obstack.cpp
@@ -0,0 +1,157 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/common.h>
+#include <ode/error.h>
+#include <ode/memory.h>
+#include "config.h"
+#include "obstack.h"
+
+
+//****************************************************************************
+// macros and constants
+
+#define ROUND_UP_OFFSET_TO_EFFICIENT_SIZE(arena,ofs) \
+    ofs = (sizeint) (dEFFICIENT_SIZE( ((sizeint)(arena)) + ofs ) - ((sizeint)(arena)) )
+
+#define MAX_ALLOC_SIZE \
+    ((sizeint)(dOBSTACK_ARENA_SIZE - sizeof (Arena) - EFFICIENT_ALIGNMENT + 1))
+
+//****************************************************************************
+// dObStack
+
+dObStack::dObStack():
+    m_first(NULL), m_last(NULL),
+    m_current_arena(NULL), m_current_ofs(0)
+{
+}
+
+
+dObStack::~dObStack()
+{
+    // free all arenas
+    Arena *a,*nexta;
+    a = m_first;
+    while (a) {
+        nexta = a->m_next;
+        dFree (a,dOBSTACK_ARENA_SIZE);
+        a = nexta;
+    }
+}
+
+
+void *dObStack::alloc (sizeint num_bytes)
+{
+    if (num_bytes > MAX_ALLOC_SIZE) dDebug (0,"num_bytes too large");
+
+    bool last_alloc_needed = false, last_init_needed = false;
+    Arena **last_ptr = NULL;
+
+    if (m_last != NULL) {
+        if ((m_last->m_used + num_bytes) > dOBSTACK_ARENA_SIZE) {
+            if (m_last->m_next != NULL) {
+                m_last = m_last->m_next;
+                last_init_needed = true;
+            } else {
+                last_ptr = &m_last->m_next;
+                last_alloc_needed = true;
+            }
+        }
+    } else {
+        last_ptr = &m_last;
+        last_alloc_needed = true;
+    }
+
+    if (last_alloc_needed) {
+        Arena *new_last = (Arena *) dAlloc (dOBSTACK_ARENA_SIZE);
+        new_last->m_next = 0;
+        *last_ptr = new_last;
+        if (m_first == NULL) {
+            m_first = new_last;
+        }
+        m_last = new_last;
+        last_init_needed = true;
+    }
+
+    if (last_init_needed) {
+        m_last->m_used = sizeof (Arena);
+        ROUND_UP_OFFSET_TO_EFFICIENT_SIZE (m_last,m_last->m_used);
+    }
+
+    // allocate an area in the arena
+    char *c = ((char*) m_last) + m_last->m_used;
+    m_last->m_used += num_bytes;
+    ROUND_UP_OFFSET_TO_EFFICIENT_SIZE (m_last,m_last->m_used);
+    return c;
+}
+
+
+void dObStack::freeAll()
+{
+    Arena *current = m_first;
+    m_last = current;
+    // It is necessary to reset used sizes in whole arena chain
+    // otherwise enumeration may proceed to remains of old deleted joints in unused arenas
+    while (current) {
+        current->m_used = sizeof(Arena);
+        ROUND_UP_OFFSET_TO_EFFICIENT_SIZE (current,current->m_used);
+        current = current->m_next;
+    }
+}
+
+
+void *dObStack::rewind()
+{
+    return switch_to_arena(m_first);
+}
+
+void *dObStack::next (sizeint num_bytes)
+{
+    // this functions like alloc, except that no new storage is ever allocated
+    if (!m_current_arena) {
+        return 0;
+    }
+
+    m_current_ofs += num_bytes;
+    ROUND_UP_OFFSET_TO_EFFICIENT_SIZE (m_current_arena,m_current_ofs);
+
+    if (m_current_ofs < m_current_arena->m_used) {
+        return ((char*) m_current_arena) + m_current_ofs;
+    }
+
+    return switch_to_arena(m_current_arena->m_next);
+}
+
+void *dObStack::switch_to_arena(Arena *next_arena)
+{
+    m_current_arena = next_arena;
+    if (!next_arena) {
+        return 0;
+    }
+    m_current_ofs = sizeof (Arena);
+    ROUND_UP_OFFSET_TO_EFFICIENT_SIZE (next_arena, m_current_ofs);
+    // Check if end of allocation has been reached
+    if (m_current_ofs >= next_arena->m_used) {
+        return 0;
+    }
+    return ((char*) next_arena) + m_current_ofs;
+}
diff --git a/libs/ode-0.16.1/ode/src/obstack.h b/libs/ode-0.16.1/ode/src/obstack.h
new file mode 100644
index 0000000..8d4f067
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/obstack.h
@@ -0,0 +1,73 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_OBSTACK_H_
+#define _ODE_OBSTACK_H_
+
+#include "objects.h" 
+
+// each obstack Arena pointer points to a block of this many bytes
+#define dOBSTACK_ARENA_SIZE 16384
+
+
+struct dObStack : public dBase {
+    dObStack();
+    ~dObStack();
+
+    void *alloc (sizeint num_bytes);
+    // allocate a block in the last arena, allocating a new arena if necessary.
+    // it is a runtime error if num_bytes is larger than the arena size.
+
+    void freeAll();
+    // free all blocks in all arenas. this does not deallocate the arenas
+    // themselves, so future alloc()s will reuse them.
+
+    void *rewind();
+    // rewind the obstack iterator, and return the address of the first
+    // allocated block. return 0 if there are no allocated blocks.
+
+    void *next (sizeint num_bytes);
+    // return the address of the next allocated block. 'num_bytes' is the size
+    // of the previous block. this returns null if there are no more arenas.
+    // the sequence of 'num_bytes' parameters passed to next() during a
+    // traversal of the list must exactly match the parameters passed to alloc().
+
+private:
+    struct Arena {
+        Arena *m_next;	// next arena in linked list
+        sizeint m_used;		// total number of bytes used in this arena, counting
+    };			//   this header
+
+private:
+    void *switch_to_arena(Arena *next_arena);
+
+private:
+    Arena *m_first;		// head of the arena linked list. 0 if no arenas yet
+    Arena *m_last;		// arena where blocks are currently being allocated
+
+    // used for iterator
+    Arena *m_current_arena;
+    sizeint m_current_ofs;
+};
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/ode.cpp b/libs/ode-0.16.1/ode/src/ode.cpp
new file mode 100644
index 0000000..40bfd6a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/ode.cpp
@@ -0,0 +1,2325 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+// this source file is mostly concerned with the data structures, not the
+// numerics.
+
+#include <ode/ode.h>
+#include <ode/memory.h>
+#include <ode/error.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "objects.h"
+#include "joints/joints.h"
+#include "step.h"
+#include "quickstep.h"
+#include "util.h"
+#include "odetls.h"
+
+// misc defines
+#define ALLOCA dALLOCA16
+
+//****************************************************************************
+// utility
+
+
+// add an object `obj' to the list who's head pointer is pointed to by `first'.
+
+void addObjectToList (dObject *obj, dObject **first)
+{
+    obj->next = *first;
+    obj->tome = first;
+    if (*first) (*first)->tome = &obj->next;
+    (*first) = obj;
+}
+
+
+// remove the object from the linked list
+
+static inline void removeObjectFromList (dObject *obj)
+{
+    if (obj->next) obj->next->tome = obj->tome;
+    *(obj->tome) = obj->next;
+    // safeguard
+    obj->next = NULL;
+    obj->tome = NULL;
+}
+
+
+// remove the joint from neighbour lists of all connected bodies
+
+static void removeJointReferencesFromAttachedBodies (dxJoint *j)
+{
+    for (int i=0; i<2; i++) {
+        dxBody *body = j->node[i].body;
+        if (body) {
+            dxJointNode *n = body->firstjoint;
+            dxJointNode *last = NULL;
+            while (n) {
+                if (n->joint == j) {
+                    if (last) last->next = n->next;
+                    else body->firstjoint = n->next;
+                    break;
+                }
+                last = n;
+                n = n->next;
+            }
+        }
+    }
+    j->node[0].body = NULL;
+    j->node[0].next = NULL;
+    j->node[1].body = NULL;
+    j->node[1].next = NULL;
+}
+
+//****************************************************************************
+// debugging
+
+// see if an object list loops on itself (if so, it's bad).
+
+static int listHasLoops (dObject *first)
+{
+    if (first==0 || first->next==0) return 0;
+    dObject *a=first,*b=first->next;
+    int skip=0;
+    while (b) {
+        if (a==b) return 1;
+        b = b->next;
+        if (skip) a = a->next;
+        skip ^= 1;
+    }
+    return 0;
+}
+
+
+// check the validity of the world data structures
+
+static int g_world_check_tag_generator = 0;
+
+static inline int generateWorldCheckTag()
+{
+    // Atomicity is not necessary here
+    return ++g_world_check_tag_generator;
+}
+
+static void checkWorld (dxWorld *w)
+{
+    dxBody *b;
+    dxJoint *j;
+
+    // check there are no loops
+    if (listHasLoops (w->firstbody)) dDebug (0,"body list has loops");
+    if (listHasLoops (w->firstjoint)) dDebug (0,"joint list has loops");
+
+    // check lists are well formed (check `tome' pointers)
+    for (b=w->firstbody; b; b=(dxBody*)b->next) {
+        if (b->next && b->next->tome != &b->next)
+            dDebug (0,"bad tome pointer in body list");
+    }
+    for (j=w->firstjoint; j; j=(dxJoint*)j->next) {
+        if (j->next && j->next->tome != &j->next)
+            dDebug (0,"bad tome pointer in joint list");
+    }
+
+    // check counts
+    int n = 0;
+    for (b=w->firstbody; b; b=(dxBody*)b->next) n++;
+    if (w->nb != n) dDebug (0,"body count incorrect");
+    n = 0;
+    for (j=w->firstjoint; j; j=(dxJoint*)j->next) n++;
+    if (w->nj != n) dDebug (0,"joint count incorrect");
+
+    // set all tag values to a known value
+    int count = generateWorldCheckTag();
+    for (b=w->firstbody; b; b=(dxBody*)b->next) b->tag = count;
+    for (j=w->firstjoint; j; j=(dxJoint*)j->next) j->tag = count;
+
+    // check all body/joint world pointers are ok
+    for (b=w->firstbody; b; b=(dxBody*)b->next) if (b->world != w)
+        dDebug (0,"bad world pointer in body list");
+    for (j=w->firstjoint; j; j=(dxJoint*)j->next) if (j->world != w)
+        dDebug (0,"bad world pointer in joint list");
+
+    /*
+    // check for half-connected joints - actually now these are valid
+    for (j=w->firstjoint; j; j=(dxJoint*)j->next) {
+    if (j->node[0].body || j->node[1].body) {
+    if (!(j->node[0].body && j->node[1].body))
+    dDebug (0,"half connected joint found");
+    }
+    }
+    */
+
+    // check that every joint node appears in the joint lists of both bodies it
+    // attaches
+    for (j=w->firstjoint; j; j=(dxJoint*)j->next) {
+        for (int i=0; i<2; i++) {
+            if (j->node[i].body) {
+                int ok = 0;
+                for (dxJointNode *n=j->node[i].body->firstjoint; n; n=n->next) {
+                    if (n->joint == j) ok = 1;
+                }
+                if (ok==0) dDebug (0,"joint not in joint list of attached body");
+            }
+        }
+    }
+
+    // check all body joint lists (correct body ptrs)
+    for (b=w->firstbody; b; b=(dxBody*)b->next) {
+        for (dxJointNode *n=b->firstjoint; n; n=n->next) {
+            if (&n->joint->node[0] == n) {
+                if (n->joint->node[1].body != b)
+                    dDebug (0,"bad body pointer in joint node of body list (1)");
+            }
+            else {
+                if (n->joint->node[0].body != b)
+                    dDebug (0,"bad body pointer in joint node of body list (2)");
+            }
+            if (n->joint->tag != count) dDebug (0,"bad joint node pointer in body");
+        }
+    }
+
+    // check all body pointers in joints, check they are distinct
+    for (j=w->firstjoint; j; j=(dxJoint*)j->next) {
+        if (j->node[0].body && (j->node[0].body == j->node[1].body))
+            dDebug (0,"non-distinct body pointers in joint");
+        if ((j->node[0].body && j->node[0].body->tag != count) ||
+            (j->node[1].body && j->node[1].body->tag != count))
+            dDebug (0,"bad body pointer in joint");
+    }
+}
+
+
+void dWorldCheck (dxWorld *w)
+{
+    checkWorld (w);
+}
+
+//****************************************************************************
+// body
+
+dxBody::dxBody(dxWorld *w) :
+dObject(w)
+{
+
+}
+
+
+dxWorld* dBodyGetWorld (dxBody * b)
+{
+    dAASSERT (b);
+    return b->world;
+}
+
+dxBody *dBodyCreate (dxWorld *w)
+{
+    dAASSERT (w);
+    dxBody *b = new dxBody(w);
+    b->firstjoint = NULL;
+    b->flags = 0;
+    b->geom = NULL;
+    b->average_lvel_buffer = NULL;
+    b->average_avel_buffer = NULL;
+    dMassSetParameters (&b->mass,1,0,0,0,1,1,1,0,0,0);
+    dSetZero (b->invI,4*3);
+    b->invI[0] = 1;
+    b->invI[5] = 1;
+    b->invI[10] = 1;
+    b->invMass = 1;
+    dSetZero (b->posr.pos,4);
+    dSetZero (b->q,4);
+    b->q[0] = 1;
+    dRSetIdentity (b->posr.R);
+    dSetZero (b->lvel,4);
+    dSetZero (b->avel,4);
+    dSetZero (b->facc,4);
+    dSetZero (b->tacc,4);
+    dSetZero (b->finite_rot_axis,4);
+    addObjectToList (b,(dObject **) &w->firstbody);
+    w->nb++;
+
+    // set auto-disable parameters
+    b->average_avel_buffer = b->average_lvel_buffer = NULL; // no buffer at beginning
+    dBodySetAutoDisableDefaults (b);	// must do this after adding to world
+    b->adis_stepsleft = b->adis.idle_steps;
+    b->adis_timeleft = b->adis.idle_time;
+    b->average_counter = 0;
+    b->average_ready = 0; // average buffer not filled on the beginning
+    dBodySetAutoDisableAverageSamplesCount(b, b->adis.average_samples);
+
+    b->moved_callback = NULL;
+
+    dBodySetDampingDefaults(b);	// must do this after adding to world
+
+    b->flags |= w->body_flags & dxBodyMaxAngularSpeed;
+    b->max_angular_speed = w->max_angular_speed;
+
+    b->flags |= dxBodyGyroscopic;
+
+    return b;
+}
+
+
+void dBodyDestroy (dxBody *b)
+{
+    dAASSERT (b);
+
+    // all geoms that link to this body must be notified that the body is about
+    // to disappear. note that the call to dGeomSetBody(geom,0) will result in
+    // dGeomGetBodyNext() returning 0 for the body, so we must get the next body
+    // before setting the body to 0.
+    dxGeom *next_geom = NULL;
+    for (dxGeom *geom = b->geom; geom; geom = next_geom) {
+        next_geom = dGeomGetBodyNext (geom);
+        dGeomSetBody (geom,0);
+    }
+
+    // detach all neighbouring joints, then delete this body.
+    dxJointNode *n = b->firstjoint;
+    while (n) {
+        // sneaky trick to speed up removal of joint references (black magic)
+        n->joint->node[(n == n->joint->node)].body = NULL;
+
+        dxJointNode *next = n->next;
+        n->next = NULL;
+        removeJointReferencesFromAttachedBodies (n->joint);
+        n = next;
+    }
+    removeObjectFromList (b);
+    b->world->nb--;
+
+    // delete the average buffers
+    if(b->average_lvel_buffer)
+    {
+        delete[] (b->average_lvel_buffer);
+        b->average_lvel_buffer = NULL;
+    }
+    if(b->average_avel_buffer)
+    {
+        delete[] (b->average_avel_buffer);
+        b->average_avel_buffer = NULL;
+    }
+
+    delete b;
+}
+
+
+void dBodySetData (dBodyID b, void *data)
+{
+    dAASSERT (b);
+    b->userdata = data;
+}
+
+
+void *dBodyGetData (dBodyID b)
+{
+    dAASSERT (b);
+    return b->userdata;
+}
+
+
+void dBodySetPosition (dBodyID b, dReal x, dReal y, dReal z)
+{
+    dAASSERT (b);
+    b->posr.pos[0] = x;
+    b->posr.pos[1] = y;
+    b->posr.pos[2] = z;
+
+    // notify all attached geoms that this body has moved
+    for (dxGeom *geom = b->geom; geom; geom = dGeomGetBodyNext (geom))
+        dGeomMoved (geom);
+}
+
+
+void dBodySetRotation (dBodyID b, const dMatrix3 R)
+{
+    dAASSERT (b && R);
+
+    memcpy(b->posr.R, R, sizeof(dMatrix3));
+    
+    bool bOrthogonalizeResult = dxOrthogonalizeR(b->posr.R);
+    dAVERIFY(bOrthogonalizeResult);
+
+    dRtoQ (R, b->q);
+    dNormalize4 (b->q);
+
+    // notify all attached geoms that this body has moved
+    for (dxGeom *geom = b->geom; geom; geom = dGeomGetBodyNext (geom)) {
+        dGeomMoved (geom);
+    }
+}
+
+
+void dBodySetQuaternion (dBodyID b, const dQuaternion q)
+{
+    dAASSERT (b && q);
+    b->q[0] = q[0];
+    b->q[1] = q[1];
+    b->q[2] = q[2];
+    b->q[3] = q[3];
+    dNormalize4 (b->q);
+    dQtoR (b->q,b->posr.R);
+
+    // notify all attached geoms that this body has moved
+    for (dxGeom *geom = b->geom; geom; geom = dGeomGetBodyNext (geom))
+        dGeomMoved (geom);
+}
+
+
+void dBodySetLinearVel  (dBodyID b, dReal x, dReal y, dReal z)
+{
+    dAASSERT (b);
+    b->lvel[0] = x;
+    b->lvel[1] = y;
+    b->lvel[2] = z;
+}
+
+
+void dBodySetAngularVel (dBodyID b, dReal x, dReal y, dReal z)
+{
+    dAASSERT (b);
+    b->avel[0] = x;
+    b->avel[1] = y;
+    b->avel[2] = z;
+}
+
+
+const dReal * dBodyGetPosition (dBodyID b)
+{
+    dAASSERT (b);
+    return b->posr.pos;
+}
+
+
+void dBodyCopyPosition (dBodyID b, dVector3 pos)
+{
+    dAASSERT (b);
+    dReal* src = b->posr.pos;
+    pos[0] = src[0];
+    pos[1] = src[1];
+    pos[2] = src[2];
+}
+
+
+const dReal * dBodyGetRotation (dBodyID b)
+{
+    dAASSERT (b);
+    return b->posr.R;
+}
+
+
+void dBodyCopyRotation (dBodyID b, dMatrix3 R)
+{
+    dAASSERT (b);
+    const dReal* src = b->posr.R;
+    R[0] = src[0];
+    R[1] = src[1];
+    R[2] = src[2];
+    R[3] = src[3];
+    R[4] = src[4];
+    R[5] = src[5];
+    R[6] = src[6];
+    R[7] = src[7];
+    R[8] = src[8];
+    R[9] = src[9];
+    R[10] = src[10];
+    R[11] = src[11];
+}
+
+
+const dReal * dBodyGetQuaternion (dBodyID b)
+{
+    dAASSERT (b);
+    return b->q;
+}
+
+
+void dBodyCopyQuaternion (dBodyID b, dQuaternion quat)
+{
+    dAASSERT (b);
+    dReal* src = b->q;
+    quat[0] = src[0];
+    quat[1] = src[1];
+    quat[2] = src[2];
+    quat[3] = src[3];
+}
+
+
+const dReal * dBodyGetLinearVel (dBodyID b)
+{
+    dAASSERT (b);
+    return b->lvel;
+}
+
+
+const dReal * dBodyGetAngularVel (dBodyID b)
+{
+    dAASSERT (b);
+    return b->avel;
+}
+
+
+void dBodySetMass (dBodyID b, const dMass *mass)
+{
+    dAASSERT (b && mass );
+    dIASSERT(dMassCheck(mass));
+
+    // The centre of mass must be at the origin.
+    // Use dMassTranslate( mass, -mass->c[0], -mass->c[1], -mass->c[2] ) to correct it.
+    dUASSERT( fabs( mass->c[0] ) <= dEpsilon &&
+        fabs( mass->c[1] ) <= dEpsilon &&
+        fabs( mass->c[2] ) <= dEpsilon, "The centre of mass must be at the origin." );
+
+    b->mass = *mass;
+    if (dInvertPDMatrix (b->mass.I,b->invI,3,NULL)==0) {
+        dDEBUGMSG ("inertia must be positive definite!");
+        dRSetIdentity (b->invI);
+    }
+    b->invMass = dRecip(b->mass.mass);
+}
+
+
+void dBodyGetMass (dBodyID b, dMass *mass)
+{
+    dAASSERT (b && mass);
+    *mass = b->mass;
+}
+
+
+void dBodyAddForce (dBodyID b, dReal fx, dReal fy, dReal fz)
+{
+    dAASSERT (b);
+    b->facc[0] += fx;
+    b->facc[1] += fy;
+    b->facc[2] += fz;
+}
+
+
+void dBodyAddTorque (dBodyID b, dReal fx, dReal fy, dReal fz)
+{
+    dAASSERT (b);
+    b->tacc[0] += fx;
+    b->tacc[1] += fy;
+    b->tacc[2] += fz;
+}
+
+
+void dBodyAddRelForce (dBodyID b, dReal fx, dReal fy, dReal fz)
+{
+    dAASSERT (b);
+    dVector3 t1,t2;
+    t1[0] = fx;
+    t1[1] = fy;
+    t1[2] = fz;
+    t1[3] = 0;
+    dMultiply0_331 (t2,b->posr.R,t1);
+    b->facc[0] += t2[0];
+    b->facc[1] += t2[1];
+    b->facc[2] += t2[2];
+}
+
+
+void dBodyAddRelTorque (dBodyID b, dReal fx, dReal fy, dReal fz)
+{
+    dAASSERT (b);
+    dVector3 t1,t2;
+    t1[0] = fx;
+    t1[1] = fy;
+    t1[2] = fz;
+    t1[3] = 0;
+    dMultiply0_331 (t2,b->posr.R,t1);
+    b->tacc[0] += t2[0];
+    b->tacc[1] += t2[1];
+    b->tacc[2] += t2[2];
+}
+
+
+void dBodyAddForceAtPos (dBodyID b, dReal fx, dReal fy, dReal fz,
+                         dReal px, dReal py, dReal pz)
+{
+    dAASSERT (b);
+    b->facc[0] += fx;
+    b->facc[1] += fy;
+    b->facc[2] += fz;
+    dVector3 f,q;
+    f[0] = fx;
+    f[1] = fy;
+    f[2] = fz;
+    q[0] = px - b->posr.pos[0];
+    q[1] = py - b->posr.pos[1];
+    q[2] = pz - b->posr.pos[2];
+    dAddVectorCross3(b->tacc,q,f);
+}
+
+
+void dBodyAddForceAtRelPos (dBodyID b, dReal fx, dReal fy, dReal fz,
+                            dReal px, dReal py, dReal pz)
+{
+    dAASSERT (b);
+    dVector3 prel,f,p;
+    f[0] = fx;
+    f[1] = fy;
+    f[2] = fz;
+    f[3] = 0;
+    prel[0] = px;
+    prel[1] = py;
+    prel[2] = pz;
+    prel[3] = 0;
+    dMultiply0_331 (p,b->posr.R,prel);
+    b->facc[0] += f[0];
+    b->facc[1] += f[1];
+    b->facc[2] += f[2];
+    dAddVectorCross3(b->tacc,p,f);
+}
+
+
+void dBodyAddRelForceAtPos (dBodyID b, dReal fx, dReal fy, dReal fz,
+                            dReal px, dReal py, dReal pz)
+{
+    dAASSERT (b);
+    dVector3 frel,f;
+    frel[0] = fx;
+    frel[1] = fy;
+    frel[2] = fz;
+    frel[3] = 0;
+    dMultiply0_331 (f,b->posr.R,frel);
+    b->facc[0] += f[0];
+    b->facc[1] += f[1];
+    b->facc[2] += f[2];
+    dVector3 q;
+    q[0] = px - b->posr.pos[0];
+    q[1] = py - b->posr.pos[1];
+    q[2] = pz - b->posr.pos[2];
+    dAddVectorCross3(b->tacc,q,f);
+}
+
+
+void dBodyAddRelForceAtRelPos (dBodyID b, dReal fx, dReal fy, dReal fz,
+                               dReal px, dReal py, dReal pz)
+{
+    dAASSERT (b);
+    dVector3 frel,prel,f,p;
+    frel[0] = fx;
+    frel[1] = fy;
+    frel[2] = fz;
+    frel[3] = 0;
+    prel[0] = px;
+    prel[1] = py;
+    prel[2] = pz;
+    prel[3] = 0;
+    dMultiply0_331 (f,b->posr.R,frel);
+    dMultiply0_331 (p,b->posr.R,prel);
+    b->facc[0] += f[0];
+    b->facc[1] += f[1];
+    b->facc[2] += f[2];
+    dAddVectorCross3(b->tacc,p,f);
+}
+
+
+const dReal * dBodyGetForce (dBodyID b)
+{
+    dAASSERT (b);
+    return b->facc;
+}
+
+
+const dReal * dBodyGetTorque (dBodyID b)
+{
+    dAASSERT (b);
+    return b->tacc;
+}
+
+
+void dBodySetForce (dBodyID b, dReal x, dReal y, dReal z)
+{
+    dAASSERT (b);
+    b->facc[0] = x;
+    b->facc[1] = y;
+    b->facc[2] = z;
+}
+
+
+void dBodySetTorque (dBodyID b, dReal x, dReal y, dReal z)
+{
+    dAASSERT (b);
+    b->tacc[0] = x;
+    b->tacc[1] = y;
+    b->tacc[2] = z;
+}
+
+
+void dBodyGetRelPointPos (dBodyID b, dReal px, dReal py, dReal pz,
+                          dVector3 result)
+{
+    dAASSERT (b);
+    dVector3 prel,p;
+    prel[0] = px;
+    prel[1] = py;
+    prel[2] = pz;
+    prel[3] = 0;
+    dMultiply0_331 (p,b->posr.R,prel);
+    result[0] = p[0] + b->posr.pos[0];
+    result[1] = p[1] + b->posr.pos[1];
+    result[2] = p[2] + b->posr.pos[2];
+}
+
+
+void dBodyGetRelPointVel (dBodyID b, dReal px, dReal py, dReal pz,
+                          dVector3 result)
+{
+    dAASSERT (b);
+    dVector3 prel,p;
+    prel[0] = px;
+    prel[1] = py;
+    prel[2] = pz;
+    prel[3] = 0;
+    dMultiply0_331 (p,b->posr.R,prel);
+    result[0] = b->lvel[0];
+    result[1] = b->lvel[1];
+    result[2] = b->lvel[2];
+    dAddVectorCross3(result,b->avel,p);
+}
+
+
+void dBodyGetPointVel (dBodyID b, dReal px, dReal py, dReal pz,
+                       dVector3 result)
+{
+    dAASSERT (b);
+    dVector3 p;
+    p[0] = px - b->posr.pos[0];
+    p[1] = py - b->posr.pos[1];
+    p[2] = pz - b->posr.pos[2];
+    p[3] = 0;
+    result[0] = b->lvel[0];
+    result[1] = b->lvel[1];
+    result[2] = b->lvel[2];
+    dAddVectorCross3(result,b->avel,p);
+}
+
+
+void dBodyGetPosRelPoint (dBodyID b, dReal px, dReal py, dReal pz,
+                          dVector3 result)
+{
+    dAASSERT (b);
+    dVector3 prel;
+    prel[0] = px - b->posr.pos[0];
+    prel[1] = py - b->posr.pos[1];
+    prel[2] = pz - b->posr.pos[2];
+    prel[3] = 0;
+    dMultiply1_331 (result,b->posr.R,prel);
+}
+
+
+void dBodyVectorToWorld (dBodyID b, dReal px, dReal py, dReal pz,
+                         dVector3 result)
+{
+    dAASSERT (b);
+    dVector3 p;
+    p[0] = px;
+    p[1] = py;
+    p[2] = pz;
+    p[3] = 0;
+    dMultiply0_331 (result,b->posr.R,p);
+}
+
+
+void dBodyVectorFromWorld (dBodyID b, dReal px, dReal py, dReal pz,
+                           dVector3 result)
+{
+    dAASSERT (b);
+    dVector3 p;
+    p[0] = px;
+    p[1] = py;
+    p[2] = pz;
+    p[3] = 0;
+    dMultiply1_331 (result,b->posr.R,p);
+}
+
+
+void dBodySetFiniteRotationMode (dBodyID b, int mode)
+{
+    dAASSERT (b);
+    b->flags &= ~(dxBodyFlagFiniteRotation | dxBodyFlagFiniteRotationAxis);
+    if (mode) {
+        b->flags |= dxBodyFlagFiniteRotation;
+        if (b->finite_rot_axis[0] != 0 || b->finite_rot_axis[1] != 0 ||
+            b->finite_rot_axis[2] != 0) {
+                b->flags |= dxBodyFlagFiniteRotationAxis;
+        }
+    }
+}
+
+
+void dBodySetFiniteRotationAxis (dBodyID b, dReal x, dReal y, dReal z)
+{
+    dAASSERT (b);
+    b->finite_rot_axis[0] = x;
+    b->finite_rot_axis[1] = y;
+    b->finite_rot_axis[2] = z;
+    if (x != 0 || y != 0 || z != 0) {
+        dNormalize3 (b->finite_rot_axis);
+        b->flags |= dxBodyFlagFiniteRotationAxis;
+    }
+    else {
+        b->flags &= ~dxBodyFlagFiniteRotationAxis;
+    }
+}
+
+
+int dBodyGetFiniteRotationMode (dBodyID b)
+{
+    dAASSERT (b);
+    return ((b->flags & dxBodyFlagFiniteRotation) != 0);
+}
+
+
+void dBodyGetFiniteRotationAxis (dBodyID b, dVector3 result)
+{
+    dAASSERT (b);
+    result[0] = b->finite_rot_axis[0];
+    result[1] = b->finite_rot_axis[1];
+    result[2] = b->finite_rot_axis[2];
+}
+
+
+int dBodyGetNumJoints (dBodyID b)
+{
+    dAASSERT (b);
+    int count=0;
+    for (dxJointNode *n=b->firstjoint; n; n=n->next, count++);
+    return count;
+}
+
+
+dJointID dBodyGetJoint (dBodyID b, int index)
+{
+    dAASSERT (b);
+    int i=0;
+    for (dxJointNode *n=b->firstjoint; n; n=n->next, i++) {
+        if (i == index) return n->joint;
+    }
+    return 0;
+}
+
+void dBodySetDynamic (dBodyID b)
+{
+    dAASSERT (b);
+
+    dBodySetMass(b,&b->mass);
+}
+
+void dBodySetKinematic (dBodyID b)
+{
+    dAASSERT (b);
+    dSetZero (b->invI,4*3);
+    b->invMass = 0; 
+}
+
+int dBodyIsKinematic (dBodyID b)
+{
+    dAASSERT (b);
+    return b->invMass == 0;
+}
+
+void dBodyEnable (dBodyID b)
+{
+    dAASSERT (b);
+    b->flags &= ~dxBodyDisabled;
+    b->adis_stepsleft = b->adis.idle_steps;
+    b->adis_timeleft = b->adis.idle_time;
+    // no code for average-processing needed here
+}
+
+
+void dBodyDisable (dBodyID b)
+{
+    dAASSERT (b);
+    b->flags |= dxBodyDisabled;
+}
+
+
+int dBodyIsEnabled (dBodyID b)
+{
+    dAASSERT (b);
+    return ((b->flags & dxBodyDisabled) == 0);
+}
+
+
+void dBodySetGravityMode (dBodyID b, int mode)
+{
+    dAASSERT (b);
+    if (mode) b->flags &= ~dxBodyNoGravity;
+    else b->flags |= dxBodyNoGravity;
+}
+
+
+int dBodyGetGravityMode (dBodyID b)
+{
+    dAASSERT (b);
+    return ((b->flags & dxBodyNoGravity) == 0);
+}
+
+
+// body auto-disable functions
+
+dReal dBodyGetAutoDisableLinearThreshold (dBodyID b)
+{
+    dAASSERT(b);
+    return dSqrt (b->adis.linear_average_threshold);
+}
+
+
+void dBodySetAutoDisableLinearThreshold (dBodyID b, dReal linear_average_threshold)
+{
+    dAASSERT(b);
+    b->adis.linear_average_threshold = linear_average_threshold * linear_average_threshold;
+}
+
+
+dReal dBodyGetAutoDisableAngularThreshold (dBodyID b)
+{
+    dAASSERT(b);
+    return dSqrt (b->adis.angular_average_threshold);
+}
+
+
+void dBodySetAutoDisableAngularThreshold (dBodyID b, dReal angular_average_threshold)
+{
+    dAASSERT(b);
+    b->adis.angular_average_threshold = angular_average_threshold * angular_average_threshold;
+}
+
+
+int dBodyGetAutoDisableAverageSamplesCount (dBodyID b)
+{
+    dAASSERT(b);
+    return b->adis.average_samples;
+}
+
+
+void dBodySetAutoDisableAverageSamplesCount (dBodyID b, unsigned int average_samples_count)
+{
+    dAASSERT(b);
+    b->adis.average_samples = average_samples_count;
+    // update the average buffers
+    if(b->average_lvel_buffer)
+    {
+        delete[] b->average_lvel_buffer;
+        b->average_lvel_buffer = NULL;
+    }
+    if(b->average_avel_buffer)
+    {
+        delete[] b->average_avel_buffer;
+        b->average_avel_buffer = NULL;
+    }
+    if(b->adis.average_samples > 0)
+    {
+        b->average_lvel_buffer = new dVector3[b->adis.average_samples];
+        b->average_avel_buffer = new dVector3[b->adis.average_samples];
+    }
+    else
+    {
+        b->average_lvel_buffer = NULL;
+        b->average_avel_buffer = NULL;
+    }
+    // new buffer is empty
+    b->average_counter = 0;
+    b->average_ready = 0;
+}
+
+
+int dBodyGetAutoDisableSteps (dBodyID b)
+{
+    dAASSERT(b);
+    return b->adis.idle_steps;
+}
+
+
+void dBodySetAutoDisableSteps (dBodyID b, int steps)
+{
+    dAASSERT(b);
+    b->adis.idle_steps = steps;
+}
+
+
+dReal dBodyGetAutoDisableTime (dBodyID b)
+{
+    dAASSERT(b);
+    return b->adis.idle_time;
+}
+
+
+void dBodySetAutoDisableTime (dBodyID b, dReal time)
+{
+    dAASSERT(b);
+    b->adis.idle_time = time;
+}
+
+
+int dBodyGetAutoDisableFlag (dBodyID b)
+{
+    dAASSERT(b);
+    return ((b->flags & dxBodyAutoDisable) != 0);
+}
+
+
+void dBodySetAutoDisableFlag (dBodyID b, int do_auto_disable)
+{
+    dAASSERT(b);
+    if (!do_auto_disable)
+    {
+        b->flags &= ~dxBodyAutoDisable;
+        // (mg) we should also reset the IsDisabled state to correspond to the DoDisabling flag
+        b->flags &= ~dxBodyDisabled;
+        b->adis.idle_steps = dWorldGetAutoDisableSteps(b->world);
+        b->adis.idle_time = dWorldGetAutoDisableTime(b->world);
+        // resetting the average calculations too
+        dBodySetAutoDisableAverageSamplesCount(b, dWorldGetAutoDisableAverageSamplesCount(b->world) );
+    }
+    else
+    {
+        b->flags |= dxBodyAutoDisable;
+    }
+}
+
+
+void dBodySetAutoDisableDefaults (dBodyID b)
+{
+    dAASSERT(b);
+    dWorldID w = b->world;
+    dAASSERT(w);
+    b->adis = w->adis;
+    dBodySetAutoDisableFlag (b, w->body_flags & dxBodyAutoDisable);
+}
+
+
+// body damping functions
+
+dReal dBodyGetLinearDamping(dBodyID b)
+{
+    dAASSERT(b);
+    return b->dampingp.linear_scale;
+}
+
+void dBodySetLinearDamping(dBodyID b, dReal scale)
+{
+    dAASSERT(b);
+    if (scale)
+        b->flags |= dxBodyLinearDamping;
+    else
+        b->flags &= ~dxBodyLinearDamping;
+    b->dampingp.linear_scale = scale;
+}
+
+dReal dBodyGetAngularDamping(dBodyID b)
+{
+    dAASSERT(b);
+    return b->dampingp.angular_scale;
+}
+
+void dBodySetAngularDamping(dBodyID b, dReal scale)
+{
+    dAASSERT(b);
+    if (scale)
+        b->flags |= dxBodyAngularDamping;
+    else
+        b->flags &= ~dxBodyAngularDamping;
+    b->dampingp.angular_scale = scale;
+}
+
+void dBodySetDamping(dBodyID b, dReal linear_scale, dReal angular_scale)
+{
+    dAASSERT(b);
+    dBodySetLinearDamping(b, linear_scale);
+    dBodySetAngularDamping(b, angular_scale);
+}
+
+dReal dBodyGetLinearDampingThreshold(dBodyID b)
+{
+    dAASSERT(b);
+    return dSqrt(b->dampingp.linear_threshold);
+}
+
+void dBodySetLinearDampingThreshold(dBodyID b, dReal threshold)
+{
+    dAASSERT(b);
+    b->dampingp.linear_threshold = threshold*threshold;
+}
+
+
+dReal dBodyGetAngularDampingThreshold(dBodyID b)
+{
+    dAASSERT(b);
+    return dSqrt(b->dampingp.angular_threshold);
+}
+
+void dBodySetAngularDampingThreshold(dBodyID b, dReal threshold)
+{
+    dAASSERT(b);
+    b->dampingp.angular_threshold = threshold*threshold;
+}
+
+void dBodySetDampingDefaults(dBodyID b)
+{
+    dAASSERT(b);
+    dWorldID w = b->world;
+    dAASSERT(w);
+    b->dampingp = w->dampingp;
+    const unsigned mask = dxBodyLinearDamping | dxBodyAngularDamping;
+    b->flags &= ~mask; // zero them
+    b->flags |= w->body_flags & mask;
+}
+
+dReal dBodyGetMaxAngularSpeed(dBodyID b)
+{
+    dAASSERT(b);
+    return b->max_angular_speed;
+}
+
+void dBodySetMaxAngularSpeed(dBodyID b, dReal max_speed)
+{
+    dAASSERT(b);
+    if (max_speed < dInfinity)
+        b->flags |= dxBodyMaxAngularSpeed;
+    else
+        b->flags &= ~dxBodyMaxAngularSpeed;
+    b->max_angular_speed = max_speed;
+}
+
+void dBodySetMovedCallback(dBodyID b, void (*callback)(dBodyID))
+{
+    dAASSERT(b);
+    b->moved_callback = callback;
+}
+
+
+dGeomID dBodyGetFirstGeom(dBodyID b)
+{
+    dAASSERT(b);
+    return b->geom;
+}
+
+
+dGeomID dBodyGetNextGeom(dGeomID geom)
+{
+    dAASSERT(geom);
+    return dGeomGetBodyNext(geom);
+}
+
+
+int dBodyGetGyroscopicMode(dBodyID b)
+{
+    dAASSERT(b);
+    return b->flags & dxBodyGyroscopic;
+}
+
+void dBodySetGyroscopicMode(dBodyID b, int enabled)
+{
+    dAASSERT(b);
+    if (enabled)
+        b->flags |= dxBodyGyroscopic;
+    else
+        b->flags &= ~dxBodyGyroscopic;
+}
+
+
+
+//****************************************************************************
+// joints
+
+
+
+template<class T>
+dxJoint* createJoint(dWorldID w, dJointGroupID group)
+{
+    dxJoint *j;
+    if (group) {
+        j = group->alloc<T>(w);
+    } else {
+        j = new T(w);
+    }
+    return j;
+}
+
+
+dxJoint * dJointCreateBall (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointBall>(w,group);
+}
+
+
+dxJoint * dJointCreateHinge (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointHinge>(w,group);
+}
+
+
+dxJoint * dJointCreateSlider (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointSlider>(w,group);
+}
+
+
+dxJoint * dJointCreateContact (dWorldID w, dJointGroupID group,
+                               const dContact *c)
+{
+    dAASSERT (w && c);
+    dxJointContact *j = (dxJointContact *)
+        createJoint<dxJointContact> (w,group);
+    j->contact = *c;
+    return j;
+}
+
+
+dxJoint * dJointCreateHinge2 (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointHinge2> (w,group);
+}
+
+
+dxJoint * dJointCreateUniversal (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointUniversal> (w,group);
+}
+
+dxJoint * dJointCreatePR (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointPR> (w,group);
+}
+
+dxJoint * dJointCreatePU (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointPU> (w,group);
+}
+
+dxJoint * dJointCreatePiston (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointPiston> (w,group);
+}
+
+dxJoint * dJointCreateFixed (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointFixed> (w,group);
+}
+
+
+dxJoint * dJointCreateNull (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointNull> (w,group);
+}
+
+
+dxJoint * dJointCreateAMotor (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointAMotor> (w,group);
+}
+
+dxJoint * dJointCreateLMotor (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointLMotor> (w,group);
+}
+
+dxJoint * dJointCreatePlane2D (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointPlane2D> (w,group);
+}
+
+dxJoint * dJointCreateDBall (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointDBall> (w,group);
+}
+
+dxJoint * dJointCreateDHinge (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointDHinge> (w,group);
+}
+
+
+dxJoint * dJointCreateTransmission (dWorldID w, dJointGroupID group)
+{
+    dAASSERT (w);
+    return createJoint<dxJointTransmission> (w,group);
+}
+
+static void FinalizeAndDestroyJointInstance(dxJoint *j, bool delete_it)
+{
+    // if any group joints have their world pointer set to 0, their world was
+    // previously destroyed. no special handling is required for these joints.
+    if (j->world != NULL) {
+        removeJointReferencesFromAttachedBodies (j);
+        removeObjectFromList (j);
+        j->world->nj--;
+    }
+    if (delete_it) { 
+        delete j;
+    } else {
+        j->~dxJoint();
+    }
+}
+
+void dJointDestroy (dxJoint *j)
+{
+    dAASSERT (j);
+    if (!(j->flags & dJOINT_INGROUP)) {
+        FinalizeAndDestroyJointInstance(j, true);
+    }
+}
+
+
+dJointGroupID dJointGroupCreate (int /*max_size*/)
+{
+    // not any more ... dUASSERT (max_size > 0,"max size must be > 0");
+    dxJointGroup *group = new dxJointGroup();
+    return group;
+}
+
+
+void dJointGroupDestroy (dJointGroupID group)
+{
+    dAASSERT (group);
+    dJointGroupEmpty (group);
+    delete group;
+}
+
+void dJointGroupEmpty (dJointGroupID group)
+{
+    dAASSERT (group);
+
+    const sizeint num_joints = group->getJointCount();
+    if (num_joints != 0) {
+        // Local array is used since ALLOCA leads to mysterious NULL values in first array element and crashes under VS2005 :)
+        const sizeint max_stack_jlist_size = 1024;
+        dxJoint *stack_jlist[max_stack_jlist_size];
+
+        const sizeint jlist_size = num_joints * sizeof(dxJoint*);
+        dxJoint **jlist = num_joints <= max_stack_jlist_size ? stack_jlist : (dxJoint **)dAlloc(jlist_size);
+
+        if (jlist != NULL) {
+            // the joints in this group are detached starting from the most recently
+            // added (at the top of the stack). this helps ensure that the various
+            // linked lists are not traversed too much, as the joints will hopefully
+            // be at the start of those lists.
+            sizeint num_exported = group->exportJoints(jlist);
+            dIVERIFY(num_exported == num_joints);
+
+            for (sizeint i = num_joints; i != 0; ) {
+                --i;
+                dxJoint *j = jlist[i];
+                FinalizeAndDestroyJointInstance(j, false);
+            }
+        } else {
+            // ...else if there is no memory, go on detaching the way it is possible
+            sizeint joint_bytes;
+            for (dxJoint *j = (dxJoint *)group->beginEnum(); j != NULL; j = (dxJoint *)group->continueEnum(joint_bytes)) {
+                joint_bytes = j->size(); // Get size before object is destroyed!
+                FinalizeAndDestroyJointInstance(j, false);
+            }
+        }
+
+        group->freeAll();
+
+        if (jlist != stack_jlist && jlist != NULL) {
+            dFree(jlist, jlist_size);
+        }
+    }
+}
+
+
+int dJointGetNumBodies(dxJoint *joint)
+{
+    // check arguments
+    dUASSERT (joint,"bad joint argument");
+
+    if ( !joint->node[0].body )
+        return 0;
+    else if ( !joint->node[1].body )
+        return 1;
+    else
+        return 2;
+}
+
+
+void dJointAttach (dxJoint *joint, dxBody *body1, dxBody *body2)
+{
+    // check arguments
+    dUASSERT (joint,"bad joint argument");
+    dUASSERT (body1 == NULL || body1 != body2, "can't have body1==body2");
+    dxWorld *world = joint->world;
+    dUASSERT ( (body1 == NULL || body1->world == world) &&
+        (body2 == NULL || body2->world == world),
+        "joint and bodies must be in same world");
+
+    // check if the joint can not be attached to just one body
+    dUASSERT (!((joint->flags & dJOINT_TWOBODIES) &&
+        ((body1 != NULL) != (body2 != NULL))),
+        "joint can not be attached to just one body");
+
+    // remove any existing body attachments
+    if (joint->node[0].body != NULL || joint->node[1].body != NULL) {
+        removeJointReferencesFromAttachedBodies (joint);
+    }
+
+    // if a body is zero, make sure that it is body2, so 0 --> node[1].body
+    if (body1 == NULL) {
+        body1 = body2;
+        body2 = NULL;
+        joint->flags |= dJOINT_REVERSE;
+    }
+    else {
+        joint->flags &= (~dJOINT_REVERSE);
+    }
+
+    // attach to new bodies
+    joint->node[0].body = body1;
+    joint->node[1].body = body2;
+    
+    if (body1 != NULL) {
+        joint->node[1].next = body1->firstjoint;
+        body1->firstjoint = &joint->node[1];
+    }
+    else {
+        joint->node[1].next = NULL;
+    }
+    
+    if (body2 != NULL) {
+        joint->node[0].next = body2->firstjoint;
+        body2->firstjoint = &joint->node[0];
+    }
+    else {
+        joint->node[0].next = NULL;
+    }
+
+    // Since the bodies are now set.
+    // Calculate the values depending on the bodies.
+    // Only need to calculate relative value if a body exist
+    if (body1 != NULL || body2 != NULL) {
+        joint->setRelativeValues();
+    }
+}
+
+void dJointEnable (dxJoint *joint)
+{
+    dAASSERT (joint);
+    joint->flags &= ~dJOINT_DISABLED;
+}
+
+void dJointDisable (dxJoint *joint)
+{
+    dAASSERT (joint);
+    joint->flags |= dJOINT_DISABLED;
+}
+
+int dJointIsEnabled (dxJoint *joint)
+{
+    dAASSERT (joint);
+    return (joint->flags & dJOINT_DISABLED) == 0;
+}
+
+void dJointSetData (dxJoint *joint, void *data)
+{
+    dAASSERT (joint);
+    joint->userdata = data;
+}
+
+
+void *dJointGetData (dxJoint *joint)
+{
+    dAASSERT (joint);
+    return joint->userdata;
+}
+
+
+dJointType dJointGetType (dxJoint *joint)
+{
+    dAASSERT (joint);
+    return joint->type();
+}
+
+
+dBodyID dJointGetBody (dxJoint *joint, int index)
+{
+    dAASSERT (joint);
+    if (index == 0 || index == 1) {
+        if (joint->flags & dJOINT_REVERSE) return joint->node[1-index].body;
+        else return joint->node[index].body;
+    }
+    else return 0;
+}
+
+
+void dJointSetFeedback (dxJoint *joint, dJointFeedback *f)
+{
+    dAASSERT (joint);
+    joint->feedback = f;
+}
+
+
+dJointFeedback *dJointGetFeedback (dxJoint *joint)
+{
+    dAASSERT (joint);
+    return joint->feedback;
+}
+
+
+
+dJointID dConnectingJoint (dBodyID in_b1, dBodyID in_b2)
+{
+    dAASSERT (in_b1 || in_b2);
+
+    dBodyID b1, b2;
+
+    if (in_b1 == 0) {
+        b1 = in_b2;
+        b2 = in_b1;
+    }
+    else {
+        b1 = in_b1;
+        b2 = in_b2;
+    }
+
+    // look through b1's neighbour list for b2
+    for (dxJointNode *n=b1->firstjoint; n; n=n->next) {
+        if (n->body == b2) return n->joint;
+    }
+
+    return 0;
+}
+
+
+
+int dConnectingJointList (dBodyID in_b1, dBodyID in_b2, dJointID* out_list)
+{
+    dAASSERT (in_b1 || in_b2);
+
+
+    dBodyID b1, b2;
+
+    if (in_b1 == 0) {
+        b1 = in_b2;
+        b2 = in_b1;
+    }
+    else {
+        b1 = in_b1;
+        b2 = in_b2;
+    }
+
+    // look through b1's neighbour list for b2
+    int numConnectingJoints = 0;
+    for (dxJointNode *n=b1->firstjoint; n; n=n->next) {
+        if (n->body == b2)
+            out_list[numConnectingJoints++] = n->joint;
+    }
+
+    return numConnectingJoints;
+}
+
+
+int dAreConnected (dBodyID b1, dBodyID b2)
+{
+    dAASSERT (b1/* && b2*/); // b2 can be NULL to test for connection to environment
+    // look through b1's neighbour list for b2
+    for (dxJointNode *n=b1->firstjoint; n; n=n->next) {
+        if (n->body == b2) return 1;
+    }
+    return 0;
+}
+
+
+int dAreConnectedExcluding (dBodyID b1, dBodyID b2, int joint_type)
+{
+    dAASSERT (b1/* && b2*/); // b2 can be NULL to test for connection to environment
+    // look through b1's neighbour list for b2
+    for (dxJointNode *n=b1->firstjoint; n; n=n->next) {
+        if (dJointGetType (n->joint) != joint_type && n->body == b2) return 1;
+    }
+    return 0;
+}
+
+//****************************************************************************
+// world
+
+dxWorld * dWorldCreate()
+{
+    dxWorld *w = new dxWorld();
+
+    return w;
+}
+
+
+void dWorldDestroy (dxWorld *w)
+{
+    // delete all bodies and joints
+    dAASSERT (w);
+    dxBody *nextb, *b = w->firstbody;
+    while (b) {
+        nextb = (dxBody*) b->next;
+        dBodyDestroy(b); // calling here dBodyDestroy for correct destroying! (i.e. the average buffers)
+        b = nextb;
+    }
+
+    dxJoint *nextj, *j = w->firstjoint;
+    while (j) {
+        nextj = (dxJoint*)j->next;
+        if (j->flags & dJOINT_INGROUP) {
+            // the joint is part of a group, so "deactivate" it instead
+            j->world = NULL;
+            j->node[0].body = NULL;
+            j->node[0].next = NULL;
+            j->node[1].body = NULL;
+            j->node[1].next = NULL;
+            dMessage (0,"warning: destroying world containing grouped joints");
+        }
+        else {
+            // TODO: shouldn't we call dJointDestroy()?
+            sizeint sz = j->size();
+            j->~dxJoint();
+            dFree (j,sz);
+        }
+        j = nextj;
+    }
+
+    delete w;
+}
+
+
+void dWorldSetData (dWorldID w, void *data)
+{
+    dAASSERT (w);
+    w->userdata = data;
+}
+
+
+void* dWorldGetData (dWorldID w)
+{
+    dAASSERT (w);
+    return w->userdata;
+}
+
+
+void dWorldSetGravity (dWorldID w, dReal x, dReal y, dReal z)
+{
+    dAASSERT (w);
+    w->gravity[0] = x;
+    w->gravity[1] = y;
+    w->gravity[2] = z;
+}
+
+
+void dWorldGetGravity (dWorldID w, dVector3 g)
+{
+    dAASSERT (w);
+    g[0] = w->gravity[0];
+    g[1] = w->gravity[1];
+    g[2] = w->gravity[2];
+}
+
+
+void dWorldSetERP (dWorldID w, dReal erp)
+{
+    dAASSERT (w);
+    w->global_erp = erp;
+}
+
+
+dReal dWorldGetERP (dWorldID w)
+{
+    dAASSERT (w);
+    return w->global_erp;
+}
+
+
+void dWorldSetCFM (dWorldID w, dReal cfm)
+{
+    dAASSERT (w);
+    w->global_cfm = cfm;
+}
+
+
+dReal dWorldGetCFM (dWorldID w)
+{
+    dAASSERT (w);
+    return w->global_cfm;
+}
+
+
+void dWorldSetStepIslandsProcessingMaxThreadCount(dWorldID w, unsigned count)
+{
+    dAASSERT (w);
+    w->islands_max_threads = count;
+}
+
+unsigned dWorldGetStepIslandsProcessingMaxThreadCount(dWorldID w)
+{
+    dAASSERT (w);
+    return w->islands_max_threads;
+}
+
+int dWorldUseSharedWorkingMemory(dWorldID w, dWorldID from_world)
+{
+    dUASSERT (w,"bad world argument");
+
+    bool result = false;
+
+    if (from_world)
+    {
+        dUASSERT (!w->wmem, "world does already have working memory allocated"); // Prevent replacement of one memory object with another to avoid cases when smaller buffer replaces a larger one or memory manager changes.
+
+        dxStepWorkingMemory *wmem = AllocateOnDemand(from_world->wmem);
+
+        if (wmem)
+        {
+            // Even though there is an assertion check on entry still release existing
+            // memory object for extra safety.
+            if (w->wmem)
+            {
+                w->wmem->Release();
+                w->wmem = NULL;
+            }
+
+            wmem->Addref();
+            w->wmem = wmem;
+
+            result = true;
+        }
+    }
+    else
+    {
+        dxStepWorkingMemory *wmem = w->wmem;
+
+        if (wmem)
+        {
+            wmem->Release();
+            w->wmem = NULL;
+        }
+
+        result = true;
+    }
+
+    return result;
+}
+
+void dWorldCleanupWorkingMemory(dWorldID w)
+{
+    dUASSERT (w,"bad world argument");
+
+    dxStepWorkingMemory *wmem = w->wmem;
+
+    if (wmem)
+    {
+        wmem->CleanupMemory();
+    }
+}
+
+int dWorldSetStepMemoryReservationPolicy(dWorldID w, const dWorldStepReserveInfo *policyinfo)
+{
+    dUASSERT (w,"bad world argument");
+    dUASSERT (!policyinfo || (policyinfo->struct_size >= sizeof(*policyinfo) && policyinfo->reserve_factor >= 1.0f), "Bad policy info");
+
+    bool result = false;
+
+    dxStepWorkingMemory *wmem = policyinfo ? AllocateOnDemand(w->wmem) : w->wmem;
+
+    if (wmem)
+    {
+        if (policyinfo)
+        {
+            wmem->SetMemoryReserveInfo(policyinfo->reserve_factor, policyinfo->reserve_minimum);
+            result = wmem->GetMemoryReserveInfo() != NULL;
+        }
+        else
+        {
+            wmem->ResetMemoryReserveInfoToDefault();
+            result = true;
+        }
+    }
+    else if (!policyinfo)
+    {
+        result = true;
+    }
+
+    return result;
+}
+
+int dWorldSetStepMemoryManager(dWorldID w, const dWorldStepMemoryFunctionsInfo *memfuncs)
+{
+    dUASSERT (w,"bad world argument");
+    dUASSERT (!memfuncs || memfuncs->struct_size >= sizeof(*memfuncs), "Bad memory functions info");
+
+    bool result = false;
+
+    dxStepWorkingMemory *wmem = memfuncs ? AllocateOnDemand(w->wmem) : w->wmem;
+
+    if (wmem)
+    {
+        if (memfuncs)
+        {
+            wmem->SetMemoryManager(memfuncs->alloc_block, memfuncs->shrink_block, memfuncs->free_block);
+            result = wmem->GetMemoryManager() != NULL;
+        }
+        else
+        {
+            wmem->ResetMemoryManagerToDefault();
+            result = true;
+        }
+    }
+    else if (!memfuncs)
+    {
+        result = true;
+    }
+
+    return result;
+}
+
+void dWorldSetStepThreadingImplementation(dWorldID w, 
+    const dxThreadingFunctionsInfo *functions_info, dThreadingImplementationID threading_impl)
+{
+    dUASSERT (w,"bad world argument");
+    dUASSERT (!functions_info || functions_info->struct_size >= sizeof(*functions_info), "Bad threading functions info");
+
+#if dTHREADING_INTF_DISABLED
+    dUASSERT(functions_info == NULL && threading_impl == NULL, "Threading interface is not available");
+#else
+    w->assignThreadingImpl(functions_info, threading_impl);
+#endif
+}
+
+
+int dWorldStep (dWorldID w, dReal stepsize)
+{
+    dUASSERT (w,"bad world argument");
+    dUASSERT (stepsize > 0,"stepsize must be > 0");
+
+    bool result = false;
+
+    dxWorldProcessIslandsInfo islandsinfo;
+    if (dxReallocateWorldProcessContext (w, islandsinfo, stepsize, &dxEstimateStepMemoryRequirements))
+    {
+        if (dxProcessIslands (w, islandsinfo, stepsize, &dxStepIsland, &dxEstimateStepMaxCallCount))
+        {
+            result = true;
+        }
+    }
+
+    return result;
+}
+
+int dWorldQuickStep (dWorldID w, dReal stepsize)
+{
+    dUASSERT (w,"bad world argument");
+    dUASSERT (stepsize > 0,"stepsize must be > 0");
+
+    bool result = false;
+
+    dxWorldProcessIslandsInfo islandsinfo;
+    if (dxReallocateWorldProcessContext (w, islandsinfo, stepsize, &dxEstimateQuickStepMemoryRequirements))
+    {
+        if (dxProcessIslands (w, islandsinfo, stepsize, &dxQuickStepIsland, &dxEstimateQuickStepMaxCallCount))
+        {
+            result = true;
+        }
+    }
+
+    return result;
+}
+
+
+void dWorldImpulseToForce (dWorldID w, dReal stepsize,
+                           dReal ix, dReal iy, dReal iz,
+                           dVector3 force)
+{
+    dAASSERT (w);
+    stepsize = dRecip(stepsize);
+    force[0] = stepsize * ix;
+    force[1] = stepsize * iy;
+    force[2] = stepsize * iz;
+    // @@@ force[3] = 0;
+}
+
+
+// world auto-disable functions
+
+dReal dWorldGetAutoDisableLinearThreshold (dWorldID w)
+{
+    dAASSERT(w);
+    return dSqrt (w->adis.linear_average_threshold);
+}
+
+
+void dWorldSetAutoDisableLinearThreshold (dWorldID w, dReal linear_average_threshold)
+{
+    dAASSERT(w);
+    w->adis.linear_average_threshold = linear_average_threshold * linear_average_threshold;
+}
+
+
+dReal dWorldGetAutoDisableAngularThreshold (dWorldID w)
+{
+    dAASSERT(w);
+    return dSqrt (w->adis.angular_average_threshold);
+}
+
+
+void dWorldSetAutoDisableAngularThreshold (dWorldID w, dReal angular_average_threshold)
+{
+    dAASSERT(w);
+    w->adis.angular_average_threshold = angular_average_threshold * angular_average_threshold;
+}
+
+
+int dWorldGetAutoDisableAverageSamplesCount (dWorldID w)
+{
+    dAASSERT(w);
+    return w->adis.average_samples;
+}
+
+
+void dWorldSetAutoDisableAverageSamplesCount (dWorldID w, unsigned int average_samples_count)
+{
+    dAASSERT(w);
+    w->adis.average_samples = average_samples_count;
+}
+
+
+int dWorldGetAutoDisableSteps (dWorldID w)
+{
+    dAASSERT(w);
+    return w->adis.idle_steps;
+}
+
+
+void dWorldSetAutoDisableSteps (dWorldID w, int steps)
+{
+    dAASSERT(w);
+    w->adis.idle_steps = steps;
+}
+
+
+dReal dWorldGetAutoDisableTime (dWorldID w)
+{
+    dAASSERT(w);
+    return w->adis.idle_time;
+}
+
+
+void dWorldSetAutoDisableTime (dWorldID w, dReal time)
+{
+    dAASSERT(w);
+    w->adis.idle_time = time;
+}
+
+
+int dWorldGetAutoDisableFlag (dWorldID w)
+{
+    dAASSERT(w);
+    return w->body_flags & dxBodyAutoDisable;
+}
+
+
+void dWorldSetAutoDisableFlag (dWorldID w, int do_auto_disable)
+{
+    dAASSERT(w);
+    if (do_auto_disable)
+        w->body_flags |= dxBodyAutoDisable;
+    else
+        w->body_flags &= ~dxBodyAutoDisable;
+}
+
+
+// world damping functions
+
+dReal dWorldGetLinearDampingThreshold(dWorldID w)
+{
+    dAASSERT(w);
+    return dSqrt(w->dampingp.linear_threshold);
+}
+
+void dWorldSetLinearDampingThreshold(dWorldID w, dReal threshold)
+{
+    dAASSERT(w);
+    w->dampingp.linear_threshold = threshold*threshold;
+}
+
+dReal dWorldGetAngularDampingThreshold(dWorldID w)
+{
+    dAASSERT(w);
+    return dSqrt(w->dampingp.angular_threshold);
+}
+
+void dWorldSetAngularDampingThreshold(dWorldID w, dReal threshold)
+{
+    dAASSERT(w);
+    w->dampingp.angular_threshold = threshold*threshold;
+}
+
+dReal dWorldGetLinearDamping(dWorldID w)
+{
+    dAASSERT(w);
+    return w->dampingp.linear_scale;
+}
+
+void dWorldSetLinearDamping(dWorldID w, dReal scale)
+{
+    dAASSERT(w);
+    if (scale)
+        w->body_flags |= dxBodyLinearDamping;
+    else
+        w->body_flags &= ~dxBodyLinearDamping;
+    w->dampingp.linear_scale = scale;
+}
+
+dReal dWorldGetAngularDamping(dWorldID w)
+{
+    dAASSERT(w);
+    return w->dampingp.angular_scale;
+}
+
+void dWorldSetAngularDamping(dWorldID w, dReal scale)
+{
+    dAASSERT(w);
+    if (scale)
+        w->body_flags |= dxBodyAngularDamping;
+    else
+        w->body_flags &= ~dxBodyAngularDamping;
+    w->dampingp.angular_scale = scale;
+}
+
+void dWorldSetDamping(dWorldID w, dReal linear_scale, dReal angular_scale)
+{
+    dAASSERT(w);
+    dWorldSetLinearDamping(w, linear_scale);
+    dWorldSetAngularDamping(w, angular_scale);
+}
+
+dReal dWorldGetMaxAngularSpeed(dWorldID w)
+{
+    dAASSERT(w);
+    return w->max_angular_speed;
+}
+
+void dWorldSetMaxAngularSpeed(dWorldID w, dReal max_speed)
+{
+    dAASSERT(w);
+    if (max_speed < dInfinity)
+        w->body_flags |= dxBodyMaxAngularSpeed;
+    else
+        w->body_flags &= ~dxBodyMaxAngularSpeed;
+    w->max_angular_speed = max_speed;
+}
+
+
+void dWorldSetQuickStepNumIterations (dWorldID w, int num)
+{
+    dAASSERT(w);
+    w->qs.num_iterations = num;
+}
+
+
+int dWorldGetQuickStepNumIterations (dWorldID w)
+{
+    dAASSERT(w);
+    return w->qs.num_iterations;
+}
+
+
+void dWorldSetQuickStepW (dWorldID w, dReal param)
+{
+    dAASSERT(w);
+    w->qs.w = param;
+}
+
+
+dReal dWorldGetQuickStepW (dWorldID w)
+{
+    dAASSERT(w);
+    return w->qs.w;
+}
+
+
+void dWorldSetContactMaxCorrectingVel (dWorldID w, dReal vel)
+{
+    dAASSERT(w);
+    w->contactp.max_vel = vel;
+}
+
+
+dReal dWorldGetContactMaxCorrectingVel (dWorldID w)
+{
+    dAASSERT(w);
+    return w->contactp.max_vel;
+}
+
+
+void dWorldSetContactSurfaceLayer (dWorldID w, dReal depth)
+{
+    dAASSERT(w);
+    w->contactp.min_depth = depth;
+}
+
+
+dReal dWorldGetContactSurfaceLayer (dWorldID w)
+{
+    dAASSERT(w);
+    return w->contactp.min_depth;
+}
+
+//****************************************************************************
+// testing
+
+#define NUM 100
+
+#define DO(x)
+
+
+extern "C" void dTestDataStructures()
+{
+    int i;
+    DO(printf ("testDynamicsStuff()\n"));
+
+    dBodyID body [NUM];
+    int nb = 0;
+    dJointID joint [NUM];
+    int nj = 0;
+
+    for (i=0; i<NUM; i++) body[i] = NULL;
+    for (i=0; i<NUM; i++) joint[i] = NULL;
+
+    DO(printf ("creating world\n"));
+    dWorldID w = dWorldCreate();
+    checkWorld (w);
+
+    for (;;) {
+        if (nb < NUM && dRandReal() > 0.5) {
+            DO(printf ("creating body\n"));
+            body[nb] = dBodyCreate (w);
+            DO(printf ("\t--> %p\n",body[nb]));
+            nb++;
+            checkWorld (w);
+            DO(printf ("%d BODIES, %d JOINTS\n",nb,nj));
+        }
+        if (nj < NUM && nb > 2 && dRandReal() > 0.5) {
+            dBodyID b1 = body [dRand() % nb];
+            dBodyID b2 = body [dRand() % nb];
+            if (b1 != b2) {
+                DO(printf ("creating joint, attaching to %p,%p\n",b1,b2));
+                joint[nj] = dJointCreateBall (w,0);
+                DO(printf ("\t-->%p\n",joint[nj]));
+                checkWorld (w);
+                dJointAttach (joint[nj],b1,b2);
+                nj++;
+                checkWorld (w);
+                DO(printf ("%d BODIES, %d JOINTS\n",nb,nj));
+            }
+        }
+        if (nj > 0 && nb > 2 && dRandReal() > 0.5) {
+            dBodyID b1 = body [dRand() % nb];
+            dBodyID b2 = body [dRand() % nb];
+            if (b1 != b2) {
+                int k = dRand() % nj;
+                DO(printf ("reattaching joint %p\n",joint[k]));
+                dJointAttach (joint[k],b1,b2);
+                checkWorld (w);
+                DO(printf ("%d BODIES, %d JOINTS\n",nb,nj));
+            }
+        }
+        if (nb > 0 && dRandReal() > 0.5) {
+            int k = dRand() % nb;
+            DO(printf ("destroying body %p\n",body[k]));
+            dBodyDestroy (body[k]);
+            checkWorld (w);
+            for (; k < (NUM-1); k++) body[k] = body[k+1];
+            nb--;
+            DO(printf ("%d BODIES, %d JOINTS\n",nb,nj));
+        }
+        if (nj > 0 && dRandReal() > 0.5) {
+            int k = dRand() % nj;
+            DO(printf ("destroying joint %p\n",joint[k]));
+            dJointDestroy (joint[k]);
+            checkWorld (w);
+            for (; k < (NUM-1); k++) joint[k] = joint[k+1];
+            nj--;
+            DO(printf ("%d BODIES, %d JOINTS\n",nb,nj));
+        }
+    }
+
+    /*
+    printf ("creating world\n");
+    dWorldID w = dWorldCreate();
+    checkWorld (w);
+    printf ("creating body\n");
+    dBodyID b1 = dBodyCreate (w);
+    checkWorld (w);
+    printf ("creating body\n");
+    dBodyID b2 = dBodyCreate (w);
+    checkWorld (w);
+    printf ("creating joint\n");
+    dJointID j = dJointCreateBall (w);
+    checkWorld (w);
+    printf ("attaching joint\n");
+    dJointAttach (j,b1,b2);
+    checkWorld (w);
+    printf ("destroying joint\n");
+    dJointDestroy (j);
+    checkWorld (w);
+    printf ("destroying body\n");
+    dBodyDestroy (b1);
+    checkWorld (w);
+    printf ("destroying body\n");
+    dBodyDestroy (b2);
+    checkWorld (w);
+    printf ("destroying world\n");
+    dWorldDestroy (w);
+    */
+}
+
+//****************************************************************************
+// configuration
+#if 1
+#define REGISTER_EXTENSION( __a )  #__a " "
+#else
+#define REGISTER_EXTENSION( __a )  "__a "
+#endif
+static const char ode_configuration[] = "ODE "
+
+// EXTENSION LIST BEGIN
+//**********************************
+
+#ifdef dNODEBUG
+REGISTER_EXTENSION( ODE_EXT_no_debug )
+#endif // dNODEBUG
+
+#if dTRIMESH_ENABLED
+REGISTER_EXTENSION( ODE_EXT_trimesh )
+
+// tri-mesh extensions
+#if dTRIMESH_OPCODE
+REGISTER_EXTENSION( ODE_EXT_opcode )
+
+// opcode extensions
+#if dTRIMESH_16BIT_INDICES
+REGISTER_EXTENSION( ODE_OPC_16bit_indices )
+#endif
+
+#if !dTRIMESH_OPCODE_USE_OLD_TRIMESH_TRIMESH_COLLIDER
+REGISTER_EXTENSION( ODE_OPC_new_collider )
+#endif
+
+#endif // dTRIMESH_OPCODE
+
+#if dTRIMESH_GIMPACT
+REGISTER_EXTENSION( ODE_EXT_gimpact )
+
+// gimpact extensions
+#endif
+
+#endif // dTRIMESH_ENABLED
+
+#if dTLS_ENABLED
+REGISTER_EXTENSION( ODE_EXT_mt_collisions )
+#endif // dTLS_ENABLED
+
+#if !dTHREADING_INTF_DISABLED
+REGISTER_EXTENSION( ODE_EXT_threading )
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+REGISTER_EXTENSION( ODE_THR_builtin_impl )
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+#endif // #if !dTHREADING_INTF_DISABLED
+
+//**********************************
+// EXTENSION LIST END
+
+// These tokens are mutually exclusive, and always present
+#ifdef dSINGLE
+"ODE_single_precision"
+#else
+"ODE_double_precision"
+#endif // dDOUBLE
+
+; // END
+
+const char* dGetConfiguration (void)
+{
+    return ode_configuration;
+}
+
+
+// Helper to check for a feature of ODE
+int dCheckConfiguration( const char* extension )
+{
+    const char *start;
+    char *where, *terminator;
+
+    /* Feature names should not have spaces. */
+    where = (char*)strchr(extension, ' ');
+    if ( where || *extension == '\0')
+        return 1;
+
+    const char* config = dGetConfiguration();
+
+    const sizeint ext_length = strlen(extension);
+
+    /* It takes a bit of care to be fool-proof. Don't be fooled by sub-strings, etc. */
+    start = config;
+    for (  ; ;  )
+    {
+        where = (char*)strstr((const char *) start, extension);
+        if (!where)
+            break;
+
+        terminator = where + ext_length;
+
+        if ( (where == start || *(where - 1) == ' ') && 
+            (*terminator == ' ' || *terminator == '\0') )
+        {
+            return 1;
+        }
+
+        start = terminator;
+    }
+
+    return 0;
+}
+
+
+// Local Variables:
+// c-basic-offset:4
+// End:
diff --git a/libs/ode-0.16.1/ode/src/odeinit.cpp b/libs/ode-0.16.1/ode/src/odeinit.cpp
new file mode 100644
index 0000000..25cc302
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/odeinit.cpp
@@ -0,0 +1,575 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+ODE initialization/finalization code
+
+*/
+
+#include <ode/common.h>
+#include <ode/odeinit.h>
+// <ode/objects.h> included for dWorldQuickStepCleanup()
+#include <ode/objects.h>
+#include "config.h"
+#include "odemath.h"
+#include "collision_kernel.h"
+#include "collision_trimesh_internal.h"
+#include "odetls.h"
+#include "odeou.h"
+#include "default_threading.h"
+
+
+//****************************************************************************
+// Initialization tracking variables
+
+static unsigned int g_uiODEInitCounter = 0;
+static unsigned int g_uiODEInitModes = 0;
+
+
+#if dTRIMESH_ENABLED && dTRIMESH_OPCODE
+
+static 
+void OPCODEAbort()
+{
+    dICHECK(!"OPCODE Library Abort");
+}
+
+
+#endif // #if dTRIMESH_ENABLED && dTRIMESH_OPCODE
+
+
+enum EODEINITMODE
+{
+    OIM__MIN,
+
+    OIM_AUTOTLSCLEANUP = OIM__MIN,
+    OIM_MANUALTLSCLEANUP,
+
+    OIM__MAX
+};
+
+#if dTLS_ENABLED
+static const EODETLSKIND g_atkTLSKindsByInitMode[OIM__MAX] =
+{
+    OTK_AUTOCLEANUP, // OIM_AUTOTLSCLEANUP,
+    OTK_MANUALCLEANUP, // OIM_MANUALTLSCLEANUP,
+};
+#endif // #if dTLS_ENABLED
+
+static inline bool IsODEModeInitialized(EODEINITMODE imInitMode)
+{
+    return (g_uiODEInitModes & (1U << imInitMode)) != 0;
+}
+
+static inline void SetODEModeInitialized(EODEINITMODE imInitMode)
+{
+    g_uiODEInitModes |= (1U << imInitMode);
+}
+
+static inline void ResetODEModeInitialized(EODEINITMODE imInitMode)
+{
+    g_uiODEInitModes &= ~(1U << imInitMode);
+}
+
+static inline bool IsODEAnyModeInitialized()
+{
+    return g_uiODEInitModes != 0;
+}
+
+
+enum
+{
+    TLD_INTERNAL_COLLISIONDATA_ALLOCATED = 0x00000001
+};
+
+static bool AllocateThreadBasicDataIfNecessary(EODEINITMODE imInitMode)
+{
+    bool bResult = false;
+
+    do
+    {
+#if dTLS_ENABLED
+        EODETLSKIND tkTlsKind = g_atkTLSKindsByInitMode[imInitMode];
+
+        const unsigned uDataAllocationFlags = COdeTls::GetDataAllocationFlags(tkTlsKind);
+
+        // If no flags are set it may mean that TLS slot is not allocated yet
+        if (uDataAllocationFlags == 0)
+        {
+            // Assign zero flags to make sure that TLS slot has been allocated
+            if (!COdeTls::AssignDataAllocationFlags(tkTlsKind, 0))
+            {
+                break;
+            }
+        }
+#else
+        (void)imInitMode; // unused
+#endif // #if dTLS_ENABLED
+
+        bResult = true;
+    }
+    while (false);
+
+    return bResult;
+}
+
+static void FreeThreadBasicDataOnFailureIfNecessary(EODEINITMODE imInitMode)
+{
+#if dTLS_ENABLED
+
+    if (imInitMode == OIM_MANUALTLSCLEANUP)
+    {
+        EODETLSKIND tkTlsKind = g_atkTLSKindsByInitMode[imInitMode];
+
+        const unsigned uDataAllocationFlags = COdeTls::GetDataAllocationFlags(tkTlsKind);
+
+        if (uDataAllocationFlags == 0)
+        {
+            // So far, only free TLS slot, if no subsystems have data allocated
+            COdeTls::CleanupForThread();
+        }
+    }
+#else
+    (void)imInitMode; // unused
+#endif // #if dTLS_ENABLED
+}
+
+#if dTLS_ENABLED
+static bool AllocateThreadCollisionData(EODETLSKIND tkTlsKind)
+{
+    bool bResult = false;
+
+    do
+    {
+        dIASSERT(!(COdeTls::GetDataAllocationFlags(tkTlsKind) & TLD_INTERNAL_COLLISIONDATA_ALLOCATED));
+
+#if dTRIMESH_ENABLED 
+
+        TrimeshCollidersCache *pccColliderCache = new TrimeshCollidersCache();
+        if (!COdeTls::AssignTrimeshCollidersCache(tkTlsKind, pccColliderCache))
+        {
+            delete pccColliderCache;
+            break;
+        }
+
+#endif // dTRIMESH_ENABLED
+
+        COdeTls::SignalDataAllocationFlags(tkTlsKind, TLD_INTERNAL_COLLISIONDATA_ALLOCATED);
+
+        bResult = true;
+    }
+    while (false);
+
+    return bResult;
+}
+#endif // dTLS_ENABLED
+
+static bool AllocateThreadCollisionDataIfNecessary(EODEINITMODE imInitMode, bool &bOutDataAllocated)
+{
+    bool bResult = false;
+    bOutDataAllocated = false;
+
+    do 
+    {
+#if dTLS_ENABLED
+        EODETLSKIND tkTlsKind = g_atkTLSKindsByInitMode[imInitMode];
+
+        const unsigned uDataAllocationFlags = COdeTls::GetDataAllocationFlags(tkTlsKind);
+
+        if ((uDataAllocationFlags & TLD_INTERNAL_COLLISIONDATA_ALLOCATED) == 0)
+        {
+            if (!AllocateThreadCollisionData(tkTlsKind))
+            {
+                break;
+            }
+
+            bOutDataAllocated = true;
+        }
+#else
+        (void)imInitMode; // unused
+#endif // #if dTLS_ENABLED
+
+        bResult = true;
+    }
+    while (false);
+
+    return bResult;
+}
+
+static void FreeThreadCollisionData(EODEINITMODE imInitMode)
+{
+#if dTLS_ENABLED
+
+    EODETLSKIND tkTlsKind = g_atkTLSKindsByInitMode[imInitMode];
+
+    COdeTls::DestroyTrimeshCollidersCache(tkTlsKind);
+
+    COdeTls::DropDataAllocationFlags(tkTlsKind, TLD_INTERNAL_COLLISIONDATA_ALLOCATED);
+#else
+    (void)imInitMode; // unused
+#endif // dTLS_ENABLED
+}
+
+
+static bool InitODEForMode(EODEINITMODE imInitMode)
+{
+    bool bResult = false;
+
+#if dOU_ENABLED
+    bool bOUCustomizationsDone = false;
+#endif
+#if dATOMICS_ENABLED
+    bool bAtomicsInitialized = false;
+#endif
+#if dTLS_ENABLED
+    EODETLSKIND tkTLSKindToInit = g_atkTLSKindsByInitMode[imInitMode];
+    bool bTlsInitialized = false;
+#else
+    (void)imInitMode; // unused
+#endif
+
+    bool bWorldThreadingInitialized = false;
+
+    do
+    {
+        bool bAnyModeAlreadyInitialized = IsODEAnyModeInitialized();
+
+        if (!bAnyModeAlreadyInitialized)
+        {
+#if dOU_ENABLED
+            if (!COdeOu::DoOUCustomizations())
+            {
+                break;
+            }
+
+            bOUCustomizationsDone = true;
+#endif
+
+#if dATOMICS_ENABLED
+            if (!COdeOu::InitializeAtomics())
+            {
+                break;
+            }
+
+            bAtomicsInitialized = true;
+#endif
+        }
+
+#if dTLS_ENABLED
+        if (!COdeTls::Initialize(tkTLSKindToInit))
+        {
+            break;
+        }
+
+        bTlsInitialized = true;
+#endif
+
+        if (!bAnyModeAlreadyInitialized)
+        {
+            if (!DefaultThreadingHolder::initializeDefaultThreading())
+            {
+                break;
+            }
+
+            bWorldThreadingInitialized = true;
+
+#if dTRIMESH_ENABLED && dTRIMESH_OPCODE
+            if (!Opcode::InitOpcode(&OPCODEAbort))
+            {
+                break;
+            }
+#endif
+
+#if dTRIMESH_ENABLED && dTRIMESH_GIMPACT
+            gimpact_init();
+#endif
+
+            dInitColliders();
+        }
+
+        bResult = true;
+    }
+    while (false);
+
+    if (!bResult)
+    {
+        if (bWorldThreadingInitialized)
+        {
+            DefaultThreadingHolder::finalizeDefaultThreading();
+        }
+
+#if dTLS_ENABLED
+        if (bTlsInitialized)
+        {
+            COdeTls::Finalize(tkTLSKindToInit);
+        }
+#endif
+
+#if dATOMICS_ENABLED
+        if (bAtomicsInitialized)
+        {
+            COdeOu::FinalizeAtomics();
+        }
+#endif
+
+#if dOU_ENABLED
+        if (bOUCustomizationsDone)
+        {
+            COdeOu::UndoOUCustomizations();
+        }
+#endif
+    }
+
+    return bResult;
+}
+
+
+static bool AllocateODEDataForThreadForMode(EODEINITMODE imInitMode, unsigned int uiAllocateFlags)
+{
+    bool bResult = false;
+
+    bool bCollisionDataAllocated = false;
+
+    do
+    {
+        if (!AllocateThreadBasicDataIfNecessary(imInitMode))
+        {
+            break;
+        }
+
+        if (uiAllocateFlags & dAllocateFlagCollisionData)
+        {
+            if (!AllocateThreadCollisionDataIfNecessary(imInitMode, bCollisionDataAllocated))
+            {
+                break;
+            }
+        }
+
+        bResult = true;
+    }
+    while (false);
+
+    if (!bResult)
+    {
+        if (bCollisionDataAllocated)
+        {
+            FreeThreadCollisionData(imInitMode);
+        }
+
+        FreeThreadBasicDataOnFailureIfNecessary(imInitMode);
+    }
+
+    return bResult;
+}
+
+
+static void CloseODEForMode(EODEINITMODE imInitMode)
+{
+    bool bAnyModeStillInitialized = IsODEAnyModeInitialized();
+
+    if (!bAnyModeStillInitialized)
+    {
+        dClearPosrCache();
+        dFinitUserClasses();
+        dFinitColliders();
+
+#if dTRIMESH_ENABLED && dTRIMESH_GIMPACT
+        gimpact_terminate();
+#endif
+
+#if dTRIMESH_ENABLED && dTRIMESH_OPCODE
+        extern void opcode_collider_cleanup();
+        // Free up static allocations in opcode
+        opcode_collider_cleanup();
+
+        Opcode::CloseOpcode();
+#endif
+
+        DefaultThreadingHolder::finalizeDefaultThreading();
+    }
+
+#if dTLS_ENABLED
+    EODETLSKIND tkTLSKindToFinalize = g_atkTLSKindsByInitMode[imInitMode];
+    COdeTls::Finalize(tkTLSKindToFinalize);
+#else
+    (void)imInitMode; // unused
+#endif
+
+    if (!bAnyModeStillInitialized)
+    {
+#if dATOMICS_ENABLED
+        COdeOu::FinalizeAtomics();
+#endif
+
+#if dOU_ENABLED
+        COdeOu::UndoOUCustomizations();
+#endif
+    }
+}
+
+
+//****************************************************************************
+// internal initialization and close routine implementations
+
+static bool InternalInitODE(unsigned int uiInitFlags)
+{
+    bool bResult = false;
+
+    do 
+    {
+        EODEINITMODE imInitMode = (uiInitFlags & dInitFlagManualThreadCleanup) ? OIM_MANUALTLSCLEANUP : OIM_AUTOTLSCLEANUP;
+
+        if (!IsODEModeInitialized(imInitMode))
+        {
+            if (!InitODEForMode(imInitMode))
+            {
+                break;
+            }
+
+            SetODEModeInitialized(imInitMode);
+        }
+
+        ++g_uiODEInitCounter;
+        bResult = true;
+    }
+    while (false);
+
+    return bResult;
+}
+
+static void InternalCloseODE()
+{
+    unsigned int uiCurrentMode = (--g_uiODEInitCounter == 0) ? OIM__MIN : OIM__MAX;
+    for (; uiCurrentMode != OIM__MAX; ++uiCurrentMode)
+    {
+        if (IsODEModeInitialized((EODEINITMODE)uiCurrentMode))
+        {
+            // Must be called before CloseODEForMode()
+            ResetODEModeInitialized((EODEINITMODE)uiCurrentMode);
+
+            // Must be called after ResetODEModeInitialized()
+            CloseODEForMode((EODEINITMODE)uiCurrentMode);
+        }
+    }
+}
+
+static bool InternalAllocateODEDataForThread(unsigned int uiAllocateFlags)
+{
+    bool bAnyFailure = false;
+
+    for (unsigned uiCurrentMode = OIM__MIN; uiCurrentMode != OIM__MAX; ++uiCurrentMode)
+    {
+        if (IsODEModeInitialized((EODEINITMODE)uiCurrentMode))
+        {
+            if (!AllocateODEDataForThreadForMode((EODEINITMODE)uiCurrentMode, uiAllocateFlags))
+            {
+                bAnyFailure = true;
+                break;
+            }
+        }
+    }
+
+    bool bResult = !bAnyFailure;
+    return bResult;
+}
+
+static void InternalCleanupODEAllDataForThread()
+{
+#if dTLS_ENABLED
+    COdeTls::CleanupForThread();
+#endif
+}
+
+//****************************************************************************
+// initialization and shutdown routines - allocate and initialize data,
+// cleanup before exiting
+
+void dInitODE()
+{
+    int bInitResult = InternalInitODE(0);
+    dIVERIFY(bInitResult);
+
+    int ibAllocResult = InternalAllocateODEDataForThread(dAllocateMaskAll);
+    dIVERIFY(ibAllocResult);
+}
+
+int dInitODE2(unsigned int uiInitFlags/*=0*/)
+{
+    bool bResult = false;
+
+    bool bODEInitialized = false;
+
+    do
+    {
+        if (!InternalInitODE(uiInitFlags))
+        {
+            break;
+        }
+
+        bODEInitialized = true;
+
+        if (!InternalAllocateODEDataForThread(dAllocateFlagBasicData))
+        {
+            break;
+        }
+
+        bResult = true;
+    }
+    while (false);
+
+    if (!bResult)
+    {
+        if (bODEInitialized)
+        {
+            InternalCloseODE();
+        }
+    }
+
+    return bResult;
+}
+
+
+int dAllocateODEDataForThread(unsigned int uiAllocateFlags)
+{
+    dUASSERT(g_uiODEInitCounter != 0, "Call dInitODE2 first");
+
+    bool bResult = InternalAllocateODEDataForThread(uiAllocateFlags);
+    return bResult;
+}
+
+
+void dCleanupODEAllDataForThread()
+{
+    dUASSERT(g_uiODEInitCounter != 0, "Call dInitODE2 first or delay dCloseODE until all threads exit");
+
+    InternalCleanupODEAllDataForThread();
+}
+
+
+void dCloseODE()
+{
+    dUASSERT(g_uiODEInitCounter != 0, "dCloseODE must not be called without dInitODE2 or if dInitODE2 fails"); // dCloseODE must not be called without dInitODE2 or if dInitODE2 fails
+
+    InternalCloseODE();
+}
+
diff --git a/libs/ode-0.16.1/ode/src/odemath.cpp b/libs/ode-0.16.1/ode/src/odemath.cpp
new file mode 100644
index 0000000..5e69b9b
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/odemath.cpp
@@ -0,0 +1,312 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/common.h>
+#include "config.h"
+#include "odemath.h"
+
+
+#undef dSafeNormalize3
+#undef dSafeNormalize4
+#undef dNormalize3
+#undef dNormalize4
+
+#undef dPlaneSpace
+#undef dOrthogonalizeR
+
+
+int  dSafeNormalize3 (dVector3 a)
+{
+    return dxSafeNormalize3(a);
+}
+
+int dSafeNormalize4 (dVector4 a)
+{
+    return dxSafeNormalize4(a);
+}
+
+void dNormalize3(dVector3 a)
+{
+    dxNormalize3(a);
+}
+
+void dNormalize4(dVector4 a)
+{
+    dxNormalize4(a);
+}
+
+
+void dPlaneSpace(const dVector3 n, dVector3 p, dVector3 q)
+{
+    return dxPlaneSpace(n, p, q);
+}
+
+int dOrthogonalizeR(dMatrix3 m)
+{
+    return dxOrthogonalizeR(m);
+}
+
+
+/*extern */
+bool dxCouldBeNormalized3(const dVector3 a)
+{
+    dAASSERT (a);
+
+    bool ret = false;
+
+    for (unsigned axis = dV3E__AXES_MIN; axis != dV3E__AXES_MAX; ++axis) {
+        if (a[axis] != REAL(0.0)) {
+            ret = true;
+            break;
+        }
+    }
+
+    return ret;
+}
+
+// this may be called for vectors `a' with extremely small magnitude, for
+// example the result of a cross product on two nearly perpendicular vectors.
+// we must be robust to these small vectors. to prevent numerical error,
+// first find the component a[i] with the largest magnitude and then scale
+// all the components by 1/a[i]. then we can compute the length of `a' and
+// scale the components by 1/l. this has been verified to work with vectors
+// containing the smallest representable numbers.
+
+/*extern */
+bool dxSafeNormalize3 (dVector3 a)
+{
+    dAASSERT (a);
+
+    bool ret = false;
+
+    do {
+        dReal abs_a0 = dFabs(a[dV3E_X]);
+        dReal abs_a1 = dFabs(a[dV3E_Y]);
+        dReal abs_a2 = dFabs(a[dV3E_Z]);
+
+        dVec3Element idx;
+
+        if (abs_a1 > abs_a0) {
+            if (abs_a2 > abs_a1) { // abs_a2 is the largest
+                idx = dV3E_Z;
+            }
+            else {              // abs_a1 is the largest
+                idx = dV3E_Y;
+            }
+        }
+        else if (abs_a2 > abs_a0) {// abs_a2 is the largest
+            idx = dV3E_Z;
+        }
+        else {              // aa[0] might be the largest
+            if (!(abs_a0 > REAL(0.0))) { 
+                // if all a's are zero, this is where we'll end up.
+                // return the vector unchanged.
+                break;
+            }
+
+            // abs_a0 is the largest
+            idx = dV3E_X;
+        }
+
+        if (idx == dV3E_X) {
+            dReal aa0_recip = dRecip(abs_a0);
+            dReal a1 = a[dV3E_Y] * aa0_recip;
+            dReal a2 = a[dV3E_Z] * aa0_recip;
+            dReal l = dRecipSqrt(REAL(1.0) + a1 * a1 + a2 * a2);
+            a[dV3E_Y] = a1 * l;
+            a[dV3E_Z] = a2 * l;
+            a[dV3E_X] = dCopySign(l, a[dV3E_X]);
+        }
+        else if (idx == dV3E_Y) {
+            dReal aa1_recip = dRecip(abs_a1);
+            dReal a0 = a[dV3E_X] * aa1_recip;
+            dReal a2 = a[dV3E_Z] * aa1_recip;
+            dReal l = dRecipSqrt(REAL(1.0) + a0 * a0 + a2 * a2);
+            a[dV3E_X] = a0 * l;
+            a[dV3E_Z] = a2 * l;
+            a[dV3E_Y] = dCopySign(l, a[dV3E_Y]);
+        }
+        else {
+            dReal aa2_recip = dRecip(abs_a2);
+            dReal a0 = a[dV3E_X] * aa2_recip;
+            dReal a1 = a[dV3E_Y] * aa2_recip;
+            dReal l = dRecipSqrt(REAL(1.0) + a0 * a0 + a1 * a1);
+            a[dV3E_X] = a0 * l;
+            a[dV3E_Y] = a1 * l;
+            a[dV3E_Z] = dCopySign(l, a[dV3E_Z]);
+        }
+
+        ret = true;
+    }
+    while (false);
+
+    return ret;
+}
+
+/* OLD VERSION */
+/*
+void dNormalize3 (dVector3 a)
+{
+    dIASSERT (a);
+    dReal l = dCalcVectorDot3(a,a);
+    if (l > 0) {
+        l = dRecipSqrt(l);
+        a[0] *= l;
+        a[1] *= l;
+        a[2] *= l;
+    }
+    else {
+        a[0] = 1;
+        a[1] = 0;
+        a[2] = 0;
+    }
+}
+*/
+
+/*extern */
+bool dxCouldBeNormalized4(const dVector4 a)
+{
+    dAASSERT (a);
+
+    bool ret = false;
+
+    for (unsigned axis = dV4E__MIN; axis != dV4E__MAX; ++axis) {
+        if (a[axis] != REAL(0.0)) {
+            ret = true;
+            break;
+        }
+    }
+
+    return ret;
+}
+
+/*extern */
+bool dxSafeNormalize4 (dVector4 a)
+{
+    dAASSERT (a);
+
+    bool ret = false;
+
+    dReal l = a[dV4E_X] * a[dV4E_X] + a[dV4E_Y] * a[dV4E_Y] + a[dV4E_Z] * a[dV4E_Z] + a[dV4E_O] * a[dV4E_O];
+    if (l > 0) {
+        l = dRecipSqrt(l);
+        a[dV4E_X] *= l;
+        a[dV4E_Y] *= l;
+        a[dV4E_Z] *= l;
+        a[dV4E_O] *= l;
+        
+        ret = true;
+    }
+
+    return ret;
+}
+
+
+void dxPlaneSpace (const dVector3 n, dVector3 p, dVector3 q)
+{
+    dAASSERT (n && p && q);
+    if (dFabs(n[2]) > M_SQRT1_2) {
+        // choose p in y-z plane
+        dReal a = n[1]*n[1] + n[2]*n[2];
+        dReal k = dRecipSqrt (a);
+        p[0] = 0;
+        p[1] = -n[2]*k;
+        p[2] = n[1]*k;
+        // set q = n x p
+        q[0] = a*k;
+        q[1] = -n[0]*p[2];
+        q[2] = n[0]*p[1];
+    }
+    else {
+        // choose p in x-y plane
+        dReal a = n[0]*n[0] + n[1]*n[1];
+        dReal k = dRecipSqrt (a);
+        p[0] = -n[1]*k;
+        p[1] = n[0]*k;
+        p[2] = 0;
+        // set q = n x p
+        q[0] = -n[2]*p[1];
+        q[1] = n[2]*p[0];
+        q[2] = a*k;
+    }
+}
+
+
+/*
+* This takes what is supposed to be a rotation matrix,
+* and make sure it is correct.
+* Note: this operates on rows, not columns, because for rotations
+* both ways give equivalent results.
+*/
+bool dxOrthogonalizeR(dMatrix3 m)
+{
+    bool ret = false;
+
+    do {
+        if (!dxCouldBeNormalized3(m + dM3E__X_MIN)) {
+            break;
+        }
+
+        dReal n0 = dCalcVectorLengthSquare3(m + dM3E__X_MIN);
+
+        dVector3 row2_store;
+        dReal *row2 = m + dM3E__Y_MIN;
+        // project row[0] on row[1], should be zero
+        dReal proj = dCalcVectorDot3(m + dM3E__X_MIN, m + dM3E__Y_MIN);
+        if (proj != 0) {
+            // Gram-Schmidt step on row[1]
+            dReal proj_div_n0 = proj / n0;
+            row2_store[dV3E_X] = m[dM3E__Y_MIN + dV3E_X] - proj_div_n0 * m[dM3E__X_MIN + dV3E_X] ;
+            row2_store[dV3E_Y] = m[dM3E__Y_MIN + dV3E_Y] - proj_div_n0 * m[dM3E__X_MIN + dV3E_Y];
+            row2_store[dV3E_Z] = m[dM3E__Y_MIN + dV3E_Z] - proj_div_n0 * m[dM3E__X_MIN + dV3E_Z];
+            row2 = row2_store;
+        }
+
+        if (!dxCouldBeNormalized3(row2)) {
+            break;
+        }
+
+        if (n0 != REAL(1.0)) {
+            bool row0_norm_fault = !dxSafeNormalize3(m + dM3E__X_MIN);
+            dIVERIFY(!row0_norm_fault);
+        }
+
+        dReal n1 = dCalcVectorLengthSquare3(row2);
+        if (n1 != REAL(1.0)) {
+            bool row1_norm_fault = !dxSafeNormalize3(row2);
+            dICHECK(!row1_norm_fault);
+        }
+
+        dIASSERT(dFabs(dCalcVectorDot3(m + dM3E__X_MIN, row2)) < 1e-6);
+
+        /* just overwrite row[2], this makes sure the matrix is not
+        a reflection */
+        dCalcVectorCross3(m + dM3E__Z_MIN, m + dM3E__X_MIN, row2);
+        
+        m[dM3E_XPAD] = m[dM3E_YPAD] = m[dM3E_ZPAD] = 0;
+
+        ret = true;
+    }
+    while (false);
+
+    return ret;
+}
diff --git a/libs/ode-0.16.1/ode/src/odemath.h b/libs/ode-0.16.1/ode/src/odemath.h
new file mode 100644
index 0000000..becf284
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/odemath.h
@@ -0,0 +1,72 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE__PRIVATE_ODEMATH_H_
+#define _ODE__PRIVATE_ODEMATH_H_
+
+#include <ode/odemath.h>
+#include "error.h"
+
+
+bool dxCouldBeNormalized3(const dVector3 a);
+bool dxSafeNormalize3 (dVector3 a);
+bool dxCouldBeNormalized4(const dVector4 a);
+bool dxSafeNormalize4 (dVector4 a);
+
+ODE_PURE_INLINE 
+void dxNormalize3(dVector3 a)
+{
+    bool bSafeNormalize3Fault;
+    if ((bSafeNormalize3Fault = !dxSafeNormalize3(a)))
+    {
+        dIVERIFY(!bSafeNormalize3Fault);
+
+        a[0] = REAL(1.0); a[2] = a[1] = REAL(0.0);
+    }
+}
+
+ODE_PURE_INLINE 
+void dxNormalize4(dVector4 a)
+{
+    bool bSafeNormalize4Fault;
+    if ((bSafeNormalize4Fault = !dxSafeNormalize4(a)))
+    {
+        dIVERIFY(!bSafeNormalize4Fault);
+
+        a[0] = REAL(1.0); a[3] = a[2] = a[1] = REAL(0.0);
+    }
+}
+
+void dxPlaneSpace (const dVector3 n, dVector3 p, dVector3 q);
+bool dxOrthogonalizeR(dMatrix3 m);
+
+// For internal use
+#define dSafeNormalize3(a) dxSafeNormalize3(a)
+#define dSafeNormalize4(a) dxSafeNormalize4(a)
+#define dNormalize3(a) dxNormalize3(a)
+#define dNormalize4(a) dxNormalize4(a)
+
+#define dPlaneSpace(n, p, q) dxPlaneSpace(n, p, q)
+#define dOrthogonalizeR(m) dxOrthogonalizeR(m)
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/odeou.cpp b/libs/ode-0.16.1/ode/src/odeou.cpp
new file mode 100644
index 0000000..e784c41
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/odeou.cpp
@@ -0,0 +1,107 @@
+/*************************************************************************
+ *                                                                       *
+ * OU library interface file for Open Dynamics Engine,                   *
+ * Copyright (C) 2008-2019 Oleh Derevenko. All rights reserved.          *
+ * Email: odar@eleks.com (change all "a" to "e")                         *
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+ODE interface to OU library implementation.
+
+*/
+
+
+#include <ode/common.h>
+#include <ode/memory.h>
+#include "config.h"
+#include "odeou.h"
+
+
+
+#if dOU_ENABLED
+
+
+using _OU_NAMESPACE::EASSERTIONFAILURESEVERITY;
+using _OU_NAMESPACE::AFS__MAX;
+using _OU_NAMESPACE::CMemoryManagerCustomization;
+using _OU_NAMESPACE::CAssertionCheckCustomization;
+
+
+BEGIN_NAMESPACE_OU();
+template<>
+const char *const CEnumUnsortedElementArray<EASSERTIONFAILURESEVERITY, AFS__MAX, const char *>::m_aetElementArray[] =
+{
+    "assert", // AFS_ASSERT,
+    "check", // AFS_CHECK,
+};
+END_NAMESPACE_OU();
+
+static const CEnumUnsortedElementArray<EASSERTIONFAILURESEVERITY, AFS__MAX, const char *> g_aszAssertionFailureSeverityNames;
+
+
+static void _OU_CONVENTION_CALLBACK ForwardOUAssertionFailure(EASSERTIONFAILURESEVERITY fsFailureSeverity, 
+                                                              const char *szAssertionExpression, const char *szAssertionFileName, unsigned int uiAssertionSourceLine)
+{
+    dDebug(d_ERR_IASSERT, "Assertion failure in OU Library. Kind: %s, expression: \"%s\", file: \"%s\", line: %u",
+        g_aszAssertionFailureSeverityNames.Encode(fsFailureSeverity), 
+        szAssertionExpression, szAssertionFileName, uiAssertionSourceLine);
+}
+
+
+static void *_OU_CONVENTION_CALLBACK ForwardOUMemoryAlloc(size_t nBlockSize)
+{
+    return dAlloc(nBlockSize);
+}
+
+static void *_OU_CONVENTION_CALLBACK ForwardOUMemoryRealloc(void *pv_ExistingBlock, size_t nBlockNewSize)
+{
+    return dRealloc(pv_ExistingBlock, 0, nBlockNewSize);
+}
+
+static void _OU_CONVENTION_CALLBACK ForwardOUMemoryFree(void *pv_ExistingBlock)
+{
+    return dFree(pv_ExistingBlock, 0);
+}
+
+
+bool COdeOu::DoOUCustomizations()
+{
+    CMemoryManagerCustomization::CustomizeMemoryManager(&ForwardOUMemoryAlloc, 
+        &ForwardOUMemoryRealloc, &ForwardOUMemoryFree);
+
+    CAssertionCheckCustomization::CustomizeAssertionChecks(&ForwardOUAssertionFailure);
+
+    return true;
+}
+
+void COdeOu::UndoOUCustomizations()
+{
+    CAssertionCheckCustomization::CustomizeAssertionChecks(NULL);
+
+    CMemoryManagerCustomization::CustomizeMemoryManager(NULL, NULL, NULL);
+}
+
+
+#endif // dOU_ENABLED
+
diff --git a/libs/ode-0.16.1/ode/src/odeou.h b/libs/ode-0.16.1/ode/src/odeou.h
new file mode 100644
index 0000000..a06de8f
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/odeou.h
@@ -0,0 +1,107 @@
+/*************************************************************************
+*                                                                       *
+* OU library interface file for Open Dynamics Engine,                   *
+* Copyright (C) 2008-2019 Oleh Derevenko. All rights reserved.          *
+* Email: odar@eleks.com (change all "a" to "e")                         *
+*                                                                       *
+* Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+* All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+*                                                                       *
+*                                                                       *
+* This library is free software; you can redistribute it and/or         *
+* modify it under the terms of EITHER:                                  *
+*   (1) The GNU Lesser General Public License as published by the Free  *
+*       Software Foundation; either version 2.1 of the License, or (at  *
+*       your option) any later version. The text of the GNU Lesser      *
+*       General Public License is included with this library in the     *
+*       file LICENSE.TXT.                                               *
+*   (2) The BSD-style license that is included with this library in     *
+*       the file LICENSE-BSD.TXT.                                       *
+*                                                                       *
+* This library is distributed in the hope that it will be useful,       *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+* LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+*                                                                       *
+*************************************************************************/
+
+/*
+
+ODE interface to OU library functions.
+
+*/
+
+
+#ifndef _ODE_ODEOU_H_
+#define _ODE_ODEOU_H_
+
+
+#if dOU_ENABLED
+
+#include <ou/assert.h>
+#include <ou/enumarrays.h>
+#include <ou/macros.h>
+#include <ou/templates.h>
+#include <ou/typewrapper.h>
+#include <ou/simpleflags.h>
+#include <ou/customization.h>
+
+#if dATOMICS_ENABLED
+#include <ou/atomic.h>
+#include <ou/atomicflags.h>
+#endif
+
+#if dTLS_ENABLED
+#include <ou/threadlocalstorage.h>
+#endif
+
+
+using _OU_NAMESPACE::CEnumUnsortedElementArray;
+using _OU_NAMESPACE::CEnumSortedElementArray;
+
+#if dATOMICS_ENABLED
+using _OU_NAMESPACE::atomicord32;
+using _OU_NAMESPACE::atomicptr;
+using _OU_NAMESPACE::InitializeAtomicAPI;
+using _OU_NAMESPACE::FinalizeAtomicAPI;
+using _OU_NAMESPACE::AtomicIncrement;
+using _OU_NAMESPACE::AtomicDecrement;
+using _OU_NAMESPACE::AtomicCompareExchange;
+using _OU_NAMESPACE::AtomicExchange;
+using _OU_NAMESPACE::AtomicExchangeAddNoResult;
+using _OU_NAMESPACE::AtomicExchangeAdd;
+using _OU_NAMESPACE::AtomicCompareExchangePointer;
+using _OU_NAMESPACE::AtomicExchangePointer;
+using _OU_NAMESPACE::AtomicReadReorderBarrier;
+using _OU_NAMESPACE::AtomicStore;
+using _OU_NAMESPACE::AtomicStorePointer;
+#endif
+
+
+class COdeOu
+{
+public:
+    static bool DoOUCustomizations();
+    static void UndoOUCustomizations();
+
+#if dATOMICS_ENABLED
+    static bool InitializeAtomics() { return InitializeAtomicAPI(); }
+    static void FinalizeAtomics() { FinalizeAtomicAPI(); }
+#endif
+};
+
+
+#endif 
+
+
+#if !dOU_ENABLED || !dATOMICS_ENABLED
+
+typedef unsigned int atomicord32;
+typedef void *atomicptr;
+
+
+#endif // dOU_ENABLED
+
+
+
+#endif // _ODE_ODEOU_H_
diff --git a/libs/ode-0.16.1/ode/src/odetls.cpp b/libs/ode-0.16.1/ode/src/odetls.cpp
new file mode 100644
index 0000000..5df2845
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/odetls.cpp
@@ -0,0 +1,153 @@
+/*************************************************************************
+ *                                                                       *
+ * Thread local storage access stub for Open Dynamics Engine,            *
+ * Copyright (C) 2008-2019 Oleh Derevenko. All rights reserved.          *
+ * Email: odar@eleks.com (change all "a" to "e")                         *
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+ODE Thread Local Storage access stub implementation.
+
+*/
+
+#include <ode/common.h>
+#include "config.h"
+#include "odemath.h"
+#include "odetls.h"
+#include "collision_trimesh_internal.h"
+
+
+#if dTLS_ENABLED
+
+
+using _OU_NAMESPACE::CTLSInitialization;
+
+
+//////////////////////////////////////////////////////////////////////////
+// Class static fields
+
+HTLSKEY COdeTls::m_ahtkStorageKeys[OTK__MAX] = { 0 };
+
+
+//////////////////////////////////////////////////////////////////////////
+// Initialization and finalization
+
+bool COdeTls::Initialize(EODETLSKIND tkTLSKind)
+{
+    dIASSERT(!m_ahtkStorageKeys[tkTLSKind]);
+
+    bool bResult = false;
+
+    unsigned uOUFlags = 0;
+
+    if (tkTLSKind == OTK_MANUALCLEANUP)
+    {
+        uOUFlags |= CTLSInitialization::SIF_MANUAL_CLEANUP_ON_THREAD_EXIT;
+    }
+
+    if (CTLSInitialization::InitializeTLSAPI(m_ahtkStorageKeys[tkTLSKind], OTI__MAX, uOUFlags))
+    {
+        bResult = true;
+    }
+
+    return bResult;
+}
+
+void COdeTls::Finalize(EODETLSKIND tkTLSKind)
+{
+    CTLSInitialization::FinalizeTLSAPI();
+
+    m_ahtkStorageKeys[tkTLSKind] = 0;
+}
+
+
+void COdeTls::CleanupForThread()
+{
+    if (m_ahtkStorageKeys[OTK_MANUALCLEANUP])
+    {
+        CTLSInitialization::CleanupOnThreadExit();
+    }
+    else
+    {
+        dIASSERT(false); // The class is not intended to be cleaned up manually
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Value modifiers
+
+bool COdeTls::AssignDataAllocationFlags(EODETLSKIND tkTLSKind, unsigned uInitializationFlags)
+{
+    bool bResult = CThreadLocalStorage::SetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_DATA_ALLOCATION_FLAGS, (tlsvaluetype)(sizeint)uInitializationFlags);
+    return bResult;
+}
+
+
+bool COdeTls::AssignTrimeshCollidersCache(EODETLSKIND tkTLSKind, TrimeshCollidersCache *pccInstance)
+{
+    dIASSERT(!CThreadLocalStorage::GetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_TRIMESH_TRIMESH_COLLIDER_CACHE));
+
+    bool bResult = CThreadLocalStorage::SetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_TRIMESH_TRIMESH_COLLIDER_CACHE, (tlsvaluetype)pccInstance, &COdeTls::FreeTrimeshCollidersCache_Callback);
+    return bResult;
+}
+
+void COdeTls::DestroyTrimeshCollidersCache(EODETLSKIND tkTLSKind)
+{
+    TrimeshCollidersCache *pccCacheInstance = (TrimeshCollidersCache *)CThreadLocalStorage::GetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_TRIMESH_TRIMESH_COLLIDER_CACHE);
+
+    if (pccCacheInstance != NULL)
+    {
+        FreeTrimeshCollidersCache(pccCacheInstance);
+
+        CThreadLocalStorage::UnsafeSetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_TRIMESH_TRIMESH_COLLIDER_CACHE, (tlsvaluetype)NULL);
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Value type destructors
+
+void COdeTls::FreeTrimeshCollidersCache(TrimeshCollidersCache *pccCacheInstance)
+{
+#if dTRIMESH_ENABLED 
+    delete pccCacheInstance;
+#else
+    dIASSERT(pccCacheInstance == NULL); // The cache is not being allocated if the library is configured without trimeshes
+#endif
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Value type destructor callbacks
+
+void COdeTls::FreeTrimeshCollidersCache_Callback(tlsvaluetype vValueData)
+{
+    TrimeshCollidersCache *pccCacheInstance = (TrimeshCollidersCache *)vValueData;
+    FreeTrimeshCollidersCache(pccCacheInstance);
+}
+
+
+#endif // #if dTLS_ENABLED
+
diff --git a/libs/ode-0.16.1/ode/src/odetls.h b/libs/ode-0.16.1/ode/src/odetls.h
new file mode 100644
index 0000000..db3306b
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/odetls.h
@@ -0,0 +1,126 @@
+/*************************************************************************
+ *                                                                       *
+ * Thread local storage access stub for Open Dynamics Engine,            *
+ * Copyright (C) 2008-2019 Oleh Derevenko. All rights reserved.          *
+ * Email: odar@eleks.com (change all "a" to "e")                         *
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+ODE Thread Local Storage access stub interface.
+
+*/
+
+
+#ifndef _ODE_ODETLS_H_
+#define _ODE_ODETLS_H_
+
+
+#include "odeou.h"
+
+
+#if dTLS_ENABLED
+
+
+using _OU_NAMESPACE::tlsvaluetype;
+using _OU_NAMESPACE::HTLSKEY;
+using _OU_NAMESPACE::CThreadLocalStorage;
+
+
+struct TrimeshCollidersCache;
+
+
+enum EODETLSKIND
+{
+    OTK__MIN,
+
+    OTK_AUTOCLEANUP = OTK__MIN,
+    OTK_MANUALCLEANUP,
+
+    OTK__MAX,
+
+    OTK__DEFAULT = OTK_AUTOCLEANUP,
+};
+
+enum EODETLSITEM
+{
+    OTI_DATA_ALLOCATION_FLAGS,
+    OTI_TRIMESH_TRIMESH_COLLIDER_CACHE,
+
+    OTI__MAX,
+};
+
+
+class COdeTls
+{
+public:
+    static bool Initialize(EODETLSKIND tkTLSKind);
+    static void Finalize(EODETLSKIND tkTLSKind);
+
+    static void CleanupForThread();
+
+public:
+    static unsigned GetDataAllocationFlags(EODETLSKIND tkTLSKind)
+    {
+        // Must be a safe call as it is used to test if TLS slot is allocated at all
+        return (unsigned)(sizeint)CThreadLocalStorage::GetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_DATA_ALLOCATION_FLAGS);
+    }
+
+    static void SignalDataAllocationFlags(EODETLSKIND tkTLSKind, unsigned uFlagsMask)
+    {
+        unsigned uCurrentFlags = (unsigned)(sizeint)CThreadLocalStorage::UnsafeGetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_DATA_ALLOCATION_FLAGS);
+        CThreadLocalStorage::UnsafeSetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_DATA_ALLOCATION_FLAGS, (tlsvaluetype)(sizeint)(uCurrentFlags | uFlagsMask));
+    }
+
+    static void DropDataAllocationFlags(EODETLSKIND tkTLSKind, unsigned uFlagsMask)
+    {
+        unsigned uCurrentFlags = (unsigned)(sizeint)CThreadLocalStorage::UnsafeGetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_DATA_ALLOCATION_FLAGS);
+        CThreadLocalStorage::UnsafeSetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_DATA_ALLOCATION_FLAGS, (tlsvaluetype)(sizeint)(uCurrentFlags & ~uFlagsMask));
+    }
+
+    static TrimeshCollidersCache *GetTrimeshCollidersCache(EODETLSKIND tkTLSKind)
+    { 
+        return (TrimeshCollidersCache *)CThreadLocalStorage::UnsafeGetStorageValue(m_ahtkStorageKeys[tkTLSKind], OTI_TRIMESH_TRIMESH_COLLIDER_CACHE);
+    }
+
+public:
+    static bool AssignDataAllocationFlags(EODETLSKIND tkTLSKind, unsigned uInitializationFlags);
+
+    static bool AssignTrimeshCollidersCache(EODETLSKIND tkTLSKind, TrimeshCollidersCache *pccInstance);
+    static void DestroyTrimeshCollidersCache(EODETLSKIND tkTLSKind);
+
+private:
+    static void FreeTrimeshCollidersCache(TrimeshCollidersCache *pccCacheInstance);
+
+private:
+    static void _OU_CONVENTION_CALLBACK FreeTrimeshCollidersCache_Callback(tlsvaluetype vValueData);
+
+private:
+    static HTLSKEY				m_ahtkStorageKeys[OTK__MAX];
+};
+
+
+#endif // dTLS_ENABLED
+
+
+#endif // _ODE_ODETLS_H_
diff --git a/libs/ode-0.16.1/ode/src/plane.cpp b/libs/ode-0.16.1/ode/src/plane.cpp
new file mode 100644
index 0000000..b54e894
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/plane.cpp
@@ -0,0 +1,146 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+standard ODE geometry primitives: public API and pairwise collision functions.
+
+the rule is that only the low level primitive collision functions should set
+dContactGeom::g1 and dContactGeom::g2.
+
+*/
+
+#include <ode/common.h>
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"
+#include "collision_std.h"
+#include "collision_util.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+//****************************************************************************
+// plane public API
+
+static void make_sure_plane_normal_has_unit_length (dxPlane *g)
+{
+    dReal l = g->p[0]*g->p[0] + g->p[1]*g->p[1] + g->p[2]*g->p[2];
+    if (l > 0) {
+        l = dRecipSqrt(l);
+        g->p[0] *= l;
+        g->p[1] *= l;
+        g->p[2] *= l;
+        g->p[3] *= l;
+    }
+    else {
+        g->p[0] = 1;
+        g->p[1] = 0;
+        g->p[2] = 0;
+        g->p[3] = 0;
+    }
+}
+
+
+dxPlane::dxPlane (dSpaceID space, dReal a, dReal b, dReal c, dReal d) :
+dxGeom (space,0)
+{
+    type = dPlaneClass;
+    p[0] = a;
+    p[1] = b;
+    p[2] = c;
+    p[3] = d;
+    make_sure_plane_normal_has_unit_length (this);
+}
+
+
+void dxPlane::computeAABB()
+{
+    aabb[0] = -dInfinity;
+    aabb[1] = dInfinity;
+    aabb[2] = -dInfinity;
+    aabb[3] = dInfinity;
+    aabb[4] = -dInfinity;
+    aabb[5] = dInfinity;
+
+    // Planes that have normal vectors aligned along an axis can use a
+    // less comprehensive (half space) bounding box.
+
+    if ( p[1] == 0.0f && p[2] == 0.0f ) {
+        // normal aligned with x-axis
+        aabb[0] = (p[0] > 0) ? -dInfinity : -p[3];
+        aabb[1] = (p[0] > 0) ? p[3] : dInfinity;
+    } else
+        if ( p[0] == 0.0f && p[2] == 0.0f ) {
+            // normal aligned with y-axis
+            aabb[2] = (p[1] > 0) ? -dInfinity : -p[3];
+            aabb[3] = (p[1] > 0) ? p[3] : dInfinity;
+        } else
+            if ( p[0] == 0.0f && p[1] == 0.0f ) {
+                // normal aligned with z-axis
+                aabb[4] = (p[2] > 0) ? -dInfinity : -p[3];
+                aabb[5] = (p[2] > 0) ? p[3] : dInfinity;
+            }
+}
+
+
+dGeomID dCreatePlane (dSpaceID space,
+                      dReal a, dReal b, dReal c, dReal d)
+{
+    return new dxPlane (space,a,b,c,d);
+}
+
+
+void dGeomPlaneSetParams (dGeomID g, dReal a, dReal b, dReal c, dReal d)
+{
+    dUASSERT (g && g->type == dPlaneClass,"argument not a plane");
+    dxPlane *p = (dxPlane*) g;
+    p->p[0] = a;
+    p->p[1] = b;
+    p->p[2] = c;
+    p->p[3] = d;
+    make_sure_plane_normal_has_unit_length (p);
+    dGeomMoved (g);
+}
+
+
+void dGeomPlaneGetParams (dGeomID g, dVector4 result)
+{
+    dUASSERT (g && g->type == dPlaneClass,"argument not a plane");
+    dxPlane *p = (dxPlane*) g;
+    result[0] = p->p[0];
+    result[1] = p->p[1];
+    result[2] = p->p[2];
+    result[3] = p->p[3];
+}
+
+
+dReal dGeomPlanePointDepth (dGeomID g, dReal x, dReal y, dReal z)
+{
+    dUASSERT (g && g->type == dPlaneClass,"argument not a plane");
+    dxPlane *p = (dxPlane*) g;
+    return p->p[3] - p->p[0]*x - p->p[1]*y - p->p[2]*z;
+}
diff --git a/libs/ode-0.16.1/ode/src/quickstep.cpp b/libs/ode-0.16.1/ode/src/quickstep.cpp
new file mode 100644
index 0000000..046bc33
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/quickstep.cpp
@@ -0,0 +1,3344 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/common.h>
+#include <ode/rotation.h>
+#include <ode/timer.h>
+#include <ode/error.h>
+#include <ode/misc.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "objects.h"
+#include "joints/joint.h"
+#include "lcp.h"
+#include "util.h"
+#include "threadingutils.h"
+
+#include <new>
+
+
+//***************************************************************************
+// configuration
+
+// for the SOR and CG methods:
+// uncomment the following line to use warm starting. this definitely
+// help for motor-driven joints. unfortunately it appears to hurt
+// with high-friction contacts using the SOR method. use with care
+
+// #define WARM_STARTING 1
+
+
+#define REORDERING_METHOD__DONT_REORDER 0
+#define REORDERING_METHOD__BY_ERROR     1
+#define REORDERING_METHOD__RANDOMLY     2
+
+// for the SOR method:
+// uncomment the following line to determine a new constraint-solving
+// order for each iteration. however, the qsort per iteration is expensive,
+// and the optimal order is somewhat problem dependent.
+// @@@ try the leaf->root ordering.
+
+// #define CONSTRAINTS_REORDERING_METHOD REORDERING_METHOD__BY_ERROR
+
+
+// for the SOR method:
+// uncomment the following line to randomly reorder constraint rows
+// during the solution. depending on the situation, this can help a lot
+// or hardly at all, but it doesn't seem to hurt.
+
+#define CONSTRAINTS_REORDERING_METHOD REORDERING_METHOD__RANDOMLY
+
+
+#if !defined(CONSTRAINTS_REORDERING_METHOD)
+#define CONSTRAINTS_REORDERING_METHOD REORDERING_METHOD__DONT_REORDER
+#endif
+
+
+#if CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__RANDOMLY
+#if !defined(RANDOM_CONSTRAINTS_REORDERING_FREQUENCY)
+#define RANDOM_CONSTRAINTS_REORDERING_FREQUENCY 8U
+#endif
+dSASSERT(RANDOM_CONSTRAINTS_REORDERING_FREQUENCY != 0);
+#endif
+
+enum dxRandomReorderStage
+{
+    RRS__MIN,
+
+    RRS_REORDERING = RRS__MIN,
+
+    RRS__MAX,
+};
+
+
+//***************************************************************************
+// macros, typedefs, forwards and inlines
+
+struct IndexError;
+
+
+#define dMIN(A,B)  ((A)>(B) ? (B) : (A))
+#define dMAX(A,B)  ((B)>(A) ? (B) : (A))
+
+
+#define dxQUICKSTEPISLAND_STAGE2B_STEP  16U
+#define dxQUICKSTEPISLAND_STAGE2C_STEP  32U
+
+#ifdef WARM_STARTING
+#define dxQUICKSTEPISLAND_STAGE4A_STEP  256U
+#else
+#define dxQUICKSTEPISLAND_STAGE4A_STEP  512U
+#endif
+
+#define dxQUICKSTEPISLAND_STAGE4LCP_IMJ_STEP 8U
+#define dxQUICKSTEPISLAND_STAGE4LCP_AD_STEP  8U
+
+#ifdef WARM_STARTING
+#define dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP  128U
+#define dxQUICKSTEPISLAND_STAGE4LCP_FC_COMPLETE_TO_PREPARE_COMPLEXITY_DIVISOR  4
+#define dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP_PREPARE  (dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP * dxQUICKSTEPISLAND_STAGE4LCP_FC_COMPLETE_TO_PREPARE_COMPLEXITY_DIVISOR)
+#define dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP_COMPLETE (dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP)
+#else
+#define dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP  (dxQUICKSTEPISLAND_STAGE4A_STEP / 2) // Average info.m is 3 for stage4a, while there are 6 reals per index in fc
+#endif
+
+#define dxQUICKSTEPISLAND_STAGE4B_STEP  256U
+
+#define dxQUICKSTEPISLAND_STAGE6A_STEP  16U
+#define dxQUICKSTEPISLAND_STAGE6B_STEP  1U
+
+template<unsigned int step_size>
+inline unsigned int CalculateOptimalThreadsCount(unsigned int complexity, unsigned int max_threads)
+{
+    unsigned int raw_threads = dMAX(complexity, step_size) / step_size; // Round down on division 
+    unsigned int optimum = dMIN(raw_threads, max_threads);
+    return optimum;
+}
+
+#define dxENCODE_INDEX(index)   ((unsigned int)((index) + 1))
+#define dxDECODE_INDEX(code)    ((unsigned int)((code) - 1))
+#define dxHEAD_INDEX            0
+
+//****************************************************************************
+// special matrix multipliers
+
+// multiply block of B matrix (q x 6) with 12 dReal per row with C vector (q)
+static inline void Multiply1_12q1 (dReal *A, const dReal *B, const dReal *C, unsigned int q)
+{
+    dIASSERT (q>0 && A && B && C);
+
+    dReal a = 0;
+    dReal b = 0;
+    dReal c = 0;
+    dReal d = 0;
+    dReal e = 0;
+    dReal f = 0;
+    dReal s;
+
+    for(unsigned int i=0, k = 0; i<q; k += 12, i++)
+    {
+        s = C[i]; //C[i] and B[n+k] cannot overlap because its value has been read into a temporary.
+
+        //For the rest of the loop, the only memory dependency (array) is from B[]
+        a += B[  k] * s;
+        b += B[1+k] * s;
+        c += B[2+k] * s;
+        d += B[3+k] * s;
+        e += B[4+k] * s;
+        f += B[5+k] * s;
+    }
+
+    A[0] = a;
+    A[1] = b;
+    A[2] = c;
+    A[3] = d;
+    A[4] = e;
+    A[5] = f;
+}
+
+//***************************************************************************
+// testing stuff
+
+#ifdef TIMING
+#define IFTIMING(x) x
+#else
+#define IFTIMING(x) ((void)0)
+#endif
+
+
+struct dJointWithInfo1
+{
+    dxJoint *joint;
+    dxJoint::Info1 info;
+};
+
+
+struct dxMIndexItem
+{
+    unsigned        mIndex;
+    unsigned        fbIndex;
+};
+
+struct dxJBodiesItem
+{
+    unsigned        first;
+    int             second; // The index is optional and can equal to -1
+};
+
+enum dxInvIRowElement
+{
+    IIE__MIN,
+
+    IIE__MATRIX_MIN = IIE__MIN,
+    IIE__MATRIX_MAX = IIE__MATRIX_MIN + dM3E__MAX,
+
+    IIE__MAX = IIE__MATRIX_MAX,
+};
+
+enum dxRHSCFMElement
+{
+    RCE_RHS = dxJoint::GI2_RHS,
+    RCE_CFM = dxJoint::GI2_CFM,
+
+    RCE__RHS_CFM_MAX = dxJoint::GI2__RHS_CFM_MAX,
+};
+
+enum dxLoHiElement
+{
+    LHE_LO = dxJoint::GI2_LO,
+    LHE_HI = dxJoint::GI2_HI,
+
+    LHE__LO_HI_MAX = dxJoint::GI2__LO_HI_MAX,
+};
+
+enum dxJacobiVectorElement
+{
+    JVE__MIN,
+    
+    JVE__L_MIN = JVE__MIN + dDA__L_MIN,
+
+    JVE_LX = JVE__MIN + dDA_LX,
+    JVE_LY = JVE__MIN + dDA_LY,
+    JVE_LZ = JVE__MIN + dDA_LZ,
+
+    JVE__L_MAX = JVE__MIN + dDA__L_MAX,
+
+    JVE__A_MIN = JVE__MIN + dDA__A_MIN,
+
+    JVE_AX = JVE__MIN + dDA_AX,
+    JVE_AY = JVE__MIN + dDA_AY,
+    JVE_AZ = JVE__MIN + dDA_AZ,
+
+    JVE__A_MAX = JVE__MIN + dDA__A_MAX,
+    
+    JVE__MAX = JVE__MIN + dDA__MAX,
+
+    JVE__L_COUNT = JVE__L_MAX - JVE__L_MIN,
+    JVE__A_COUNT = JVE__A_MAX - JVE__A_MIN,
+};
+
+enum dxJacobiMatrixElement
+{
+    JME__MIN,
+
+    JME__J1_MIN = JME__MIN,
+    JME__J1L_MIN = JME__J1_MIN + JVE__L_MIN,
+
+    JME_J1LX = JME__J1_MIN + JVE_LX,
+    JME_J1LY = JME__J1_MIN + JVE_LY,
+    JME_J1LZ = JME__J1_MIN + JVE_LZ,
+
+    JME__J1L_MAX = JME__J1_MIN + JVE__L_MAX,
+
+    JME__J1A_MIN = JME__J1_MIN + JVE__A_MIN,
+
+    JME_J1AX = JME__J1_MIN + JVE_AX,
+    JME_J1AY = JME__J1_MIN + JVE_AY,
+    JME_J1AZ = JME__J1_MIN + JVE_AZ,
+
+    JME__J1A_MAX = JME__J1_MIN + JVE__A_MAX,
+    JME__J1_MAX = JME__J1_MIN + JVE__MAX,
+
+    JME__RHS_CFM_MIN = JME__J1_MAX,
+    JME_RHS = JME__RHS_CFM_MIN + RCE_RHS,
+    JME_CFM = JME__RHS_CFM_MIN + RCE_CFM,
+    JME__RHS_CFM_MAX = JME__RHS_CFM_MIN + RCE__RHS_CFM_MAX,
+
+    JME__J2_MIN = JME__RHS_CFM_MAX,
+    JME__J2L_MIN = JME__J2_MIN + JVE__L_MIN,
+
+    JME_J2LX = JME__J2_MIN + JVE_LX,
+    JME_J2LY = JME__J2_MIN + JVE_LY,
+    JME_J2LZ = JME__J2_MIN + JVE_LZ,
+
+    JME__J2L_MAX = JME__J2_MIN + JVE__L_MAX,
+
+    JME__J2A_MIN = JME__J2_MIN + JVE__A_MIN,
+
+    JME_J2AX = JME__J2_MIN + JVE_AX,
+    JME_J2AY = JME__J2_MIN + JVE_AY,
+    JME_J2AZ = JME__J2_MIN + JVE_AZ,
+
+    JME__J2A_MAX = JME__J2_MIN + JVE__A_MAX,
+    JME__J2_MAX = JME__J2_MIN + JVE__MAX,
+
+    JME__LO_HI_MIN = JME__J2_MAX,
+    JME_LO = JME__LO_HI_MIN + LHE_LO,
+    JME_HI = JME__LO_HI_MIN + LHE_HI,
+    JME__LO_HI_MAX = JME__LO_HI_MIN + LHE__LO_HI_MAX,
+
+    JME__MAX = JME__LO_HI_MAX, // Is not that a luck to have 16 elements here? ;-)
+
+    JME__J1_COUNT = JME__J1_MAX - JME__J1_MIN,
+    JME__J2_COUNT = JME__J2_MAX - JME__J2_MIN,
+    JME__J_COUNT = JVE__MAX,
+};
+
+dSASSERT(JME__J_COUNT == JME__J1_COUNT);
+dSASSERT(JME__J_COUNT == JME__J2_COUNT);
+
+enum dxJacobiCopyElement
+{
+    JCE__MIN,
+
+    JCE__J1_MIN = JCE__MIN,
+    JCE__J1L_MIN = JCE__J1_MIN,
+
+    JCE_J1LX = JCE__J1L_MIN,
+    JCE_J1LY,
+    JCE_J1LZ,
+
+    JCE__J1L_MAX,
+
+    JCE__J1A_MIN = JCE__J1L_MAX,
+
+    JCE_J1AX = JCE__J1A_MIN,
+    JCE_J1AY,
+    JCE_J1AZ,
+
+    JCE__J1A_MAX,
+    JCE__J1_MAX = JCE__J1A_MAX,
+
+    JCE__J2_MIN = JCE__J1_MAX,
+    JCE__J2L_MIN = JCE__J2_MIN,
+
+    JCE_J2LX = JCE__J2L_MIN,
+    JCE_J2LY,
+    JCE_J2LZ,
+
+    JCE__J2L_MAX,
+
+    JCE__J2A_MIN = JCE__J2L_MAX,
+
+    JCE_J2AX = JCE__J2A_MIN,
+    JCE_J2AY,
+    JCE_J2AZ,
+
+    JCE__J2A_MAX,
+    JCE__J2_MAX = JCE__J2A_MAX,
+
+    JCE__MAX = JCE__J2_MAX,
+
+    JCE__J1_COUNT = JCE__J1_MAX - JCE__J1_MIN,
+    JCE__J2_COUNT = JCE__J2_MAX - JCE__J2_MIN,
+    JCE__JMAX_COUNT = dMAX(JCE__J1_COUNT, JCE__J2_COUNT),
+};
+
+enum dxInvMJTElement
+{
+    IMJ__MIN,
+
+    IMJ__1_MIN = IMJ__MIN,
+    
+    IMJ__1L_MIN = IMJ__1_MIN + JVE__L_MIN,
+
+    IMJ_1LX = IMJ__1_MIN + JVE_LX,
+    IMJ_1LY = IMJ__1_MIN + JVE_LY,
+    IMJ_1LZ = IMJ__1_MIN + JVE_LZ,
+
+    IMJ__1L_MAX = IMJ__1_MIN + JVE__L_MAX,
+
+    IMJ__1A_MIN = IMJ__1_MIN + JVE__A_MIN,
+
+    IMJ_1AX = IMJ__1_MIN + JVE_AX,
+    IMJ_1AY = IMJ__1_MIN + JVE_AY,
+    IMJ_1AZ = IMJ__1_MIN + JVE_AZ,
+
+    IMJ__1A_MAX = IMJ__1_MIN + JVE__A_MAX,
+    
+    IMJ__1_MAX = IMJ__1_MIN + JVE__MAX,
+
+    IMJ__2_MIN = IMJ__1_MAX,
+
+    IMJ__2L_MIN = IMJ__2_MIN + JVE__L_MIN,
+
+    IMJ_2LX = IMJ__2_MIN + JVE_LX,
+    IMJ_2LY = IMJ__2_MIN + JVE_LY,
+    IMJ_2LZ = IMJ__2_MIN + JVE_LZ,
+
+    IMJ__2L_MAX = IMJ__2_MIN + JVE__L_MAX,
+
+    IMJ__2A_MIN = IMJ__2_MIN + JVE__A_MIN,
+
+    IMJ_2AX = IMJ__2_MIN + JVE_AX,
+    IMJ_2AY = IMJ__2_MIN + JVE_AY,
+    IMJ_2AZ = IMJ__2_MIN + JVE_AZ,
+
+    IMJ__2A_MAX = IMJ__2_MIN + JVE__A_MAX,
+
+    IMJ__2_MAX = IMJ__2_MIN + JVE__MAX,
+
+    IMJ__MAX = IMJ__2_MAX,
+};
+
+enum dxContactForceElement
+{
+    CFE__MIN,
+
+    CFE__DYNAMICS_MIN = CFE__MIN,
+
+    CFE__L_MIN = CFE__DYNAMICS_MIN + dDA__L_MIN,
+
+    CFE_LX = CFE__DYNAMICS_MIN + dDA_LX,
+    CFE_LY = CFE__DYNAMICS_MIN + dDA_LY,
+    CFE_LZ = CFE__DYNAMICS_MIN + dDA_LZ,
+
+    CFE__L_MAX = CFE__DYNAMICS_MIN + dDA__L_MAX,
+
+    CFE__A_MIN = CFE__DYNAMICS_MIN + dDA__A_MIN,
+
+    CFE_AX = CFE__DYNAMICS_MIN + dDA_AX,
+    CFE_AY = CFE__DYNAMICS_MIN + dDA_AY,
+    CFE_AZ = CFE__DYNAMICS_MIN + dDA_AZ,
+
+    CFE__A_MAX = CFE__DYNAMICS_MIN + dDA__A_MAX,
+
+    CFE__DYNAMICS_MAX = CFE__DYNAMICS_MIN + dDA__MAX,
+
+    CFE__MAX = CFE__DYNAMICS_MAX,
+};
+
+enum dxRHSElement
+{
+    RHS__MIN,
+
+    RHS__DYNAMICS_MIN = RHS__MIN,
+
+    RHS__L_MIN = RHS__DYNAMICS_MIN + dDA__L_MIN,
+
+    RHS_LX = RHS__DYNAMICS_MIN + dDA_LX,
+    RHS_LY = RHS__DYNAMICS_MIN + dDA_LY,
+    RHS_LZ = RHS__DYNAMICS_MIN + dDA_LZ,
+
+    RHS__L_MAX = RHS__DYNAMICS_MIN + dDA__L_MAX,
+
+    RHS__A_MIN = RHS__DYNAMICS_MIN + dDA__A_MIN,
+
+    RHS_AX = RHS__DYNAMICS_MIN + dDA_AX,
+    RHS_AY = RHS__DYNAMICS_MIN + dDA_AY,
+    RHS_AZ = RHS__DYNAMICS_MIN + dDA_AZ,
+
+    RHS__A_MAX = RHS__DYNAMICS_MIN + dDA__A_MAX,
+
+    RHS__DYNAMICS_MAX = RHS__DYNAMICS_MIN + dDA__MAX,
+
+    RHS__MAX = RHS__DYNAMICS_MAX,
+};
+
+
+#define JACOBIAN_ALIGNMENT  dMAX(JME__MAX * sizeof(dReal), EFFICIENT_ALIGNMENT)
+dSASSERT(((JME__MAX - 1) & JME__MAX) == 0); // Otherwise there is no reason to over-align the Jacobian
+
+#define JCOPY_ALIGNMENT    dMAX(32, EFFICIENT_ALIGNMENT)
+#define INVI_ALIGNMENT     dMAX(32, EFFICIENT_ALIGNMENT)
+#define INVMJ_ALIGNMENT    dMAX(32, EFFICIENT_ALIGNMENT)
+
+
+struct dxQuickStepperStage0Outputs
+{
+    unsigned int                    nj;
+    unsigned int                    m;
+    unsigned int                    mfb;
+};
+
+struct dxQuickStepperStage1CallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *stepperCallContext, void *stageMemArenaState, dReal *invI, dJointWithInfo1 *jointinfos)
+    {
+        m_stepperCallContext = stepperCallContext;
+        m_stageMemArenaState = stageMemArenaState; 
+        m_invI = invI;
+        m_jointinfos = jointinfos;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    void                            *m_stageMemArenaState;
+    dReal                           *m_invI;
+    dJointWithInfo1                 *m_jointinfos;
+    dxQuickStepperStage0Outputs     m_stage0Outputs;
+};
+
+struct dxQuickStepperStage0BodiesCallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *stepperCallContext, dReal *invI)
+    {
+        m_stepperCallContext = stepperCallContext;
+        m_invI = invI;
+        m_tagsTaken = 0;
+        m_gravityTaken = 0;
+        m_inertiaBodyIndex = 0;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    dReal                           *m_invI;
+    atomicord32                     m_tagsTaken;
+    atomicord32                     m_gravityTaken;
+    volatile atomicord32            m_inertiaBodyIndex;
+};
+
+struct dxQuickStepperStage0JointsCallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *stepperCallContext, dJointWithInfo1 *jointinfos, dxQuickStepperStage0Outputs *stage0Outputs)
+    {
+        m_stepperCallContext = stepperCallContext;
+        m_jointinfos = jointinfos;
+        m_stage0Outputs = stage0Outputs;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    dJointWithInfo1                 *m_jointinfos;
+    dxQuickStepperStage0Outputs     *m_stage0Outputs;
+};
+
+static int dxQuickStepIsland_Stage0_Bodies_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage0_Joints_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage1_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+
+static void dxQuickStepIsland_Stage0_Bodies(dxQuickStepperStage0BodiesCallContext *callContext);
+static void dxQuickStepIsland_Stage0_Joints(dxQuickStepperStage0JointsCallContext *callContext);
+static void dxQuickStepIsland_Stage1(dxQuickStepperStage1CallContext *callContext);
+
+
+struct dxQuickStepperLocalContext
+{
+    void Initialize(dReal *invI, dJointWithInfo1 *jointinfos, unsigned int nj, 
+        unsigned int m, unsigned int mfb, const dxMIndexItem *mindex, dxJBodiesItem *jb, int *findex, 
+        dReal *J, dReal *Jcopy)
+    {
+        m_invI = invI;
+        m_jointinfos = jointinfos;
+        m_nj = nj;
+        m_m = m;
+        m_mfb = mfb;
+        m_valid_findices = 0;
+        m_mindex = mindex;
+        m_jb = jb;
+        m_findex = findex; 
+        m_J = J;
+        m_Jcopy = Jcopy;
+    }
+
+    dReal                           *m_invI;
+    dJointWithInfo1                 *m_jointinfos;
+    unsigned int                    m_nj;
+    unsigned int                    m_m;
+    unsigned int                    m_mfb;
+    volatile atomicord32            m_valid_findices;
+    const dxMIndexItem              *m_mindex;
+    dxJBodiesItem                   *m_jb;
+    int                             *m_findex;
+    dReal                           *m_J;
+    dReal                           *m_Jcopy;
+};
+
+struct dxQuickStepperStage3CallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *callContext, const dxQuickStepperLocalContext *localContext, 
+        void *stage1MemArenaState)
+    {
+        m_stepperCallContext = callContext;
+        m_localContext = localContext;
+        m_stage1MemArenaState = stage1MemArenaState;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    const dxQuickStepperLocalContext   *m_localContext;
+    void                            *m_stage1MemArenaState;
+};
+
+struct dxQuickStepperStage2CallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *callContext, dxQuickStepperLocalContext *localContext, 
+        dReal *rhs_tmp)
+    {
+        m_stepperCallContext = callContext;
+        m_localContext = localContext;
+        m_rhs_tmp = rhs_tmp;
+        m_ji_J = 0;
+        m_ji_jb = 0;
+        m_bi = 0;
+        m_Jrhsi = 0;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    dxQuickStepperLocalContext      *m_localContext;
+    dReal                           *m_rhs_tmp;
+    volatile atomicord32            m_ji_J;
+    volatile atomicord32            m_ji_jb;
+    volatile atomicord32            m_bi;
+    volatile atomicord32            m_Jrhsi;
+};
+
+static int dxQuickStepIsland_Stage2a_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage2aSync_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage2b_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage2bSync_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage2c_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage3_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+
+static void dxQuickStepIsland_Stage2a(dxQuickStepperStage2CallContext *stage2CallContext);
+static void dxQuickStepIsland_Stage2b(dxQuickStepperStage2CallContext *stage2CallContext);
+static void dxQuickStepIsland_Stage2c(dxQuickStepperStage2CallContext *stage2CallContext);
+static void dxQuickStepIsland_Stage3(dxQuickStepperStage3CallContext *stage3CallContext);
+
+
+struct dxQuickStepperStage5CallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *callContext, const dxQuickStepperLocalContext *localContext, 
+        void *stage3MemArenaState)
+    {
+        m_stepperCallContext = callContext;
+        m_localContext = localContext;
+        m_stage3MemArenaState = stage3MemArenaState;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    const dxQuickStepperLocalContext   *m_localContext;
+    void                            *m_stage3MemArenaState;
+};
+
+struct dxQuickStepperStage4CallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *callContext, const dxQuickStepperLocalContext *localContext, 
+        dReal *lambda, dReal *cforce, dReal *iMJ, IndexError *order, dReal *last_lambda, atomicord32 *bi_links_or_mi_levels, atomicord32 *mi_links)
+    {
+        m_stepperCallContext = callContext;
+        m_localContext = localContext;
+        m_lambda = lambda;
+        m_cforce = cforce;
+        m_iMJ = iMJ;
+        m_order = order;
+        m_last_lambda = last_lambda;
+        m_bi_links_or_mi_levels = bi_links_or_mi_levels;
+        m_mi_links = mi_links;
+        m_LCP_IterationSyncReleasee = NULL;
+        m_LCP_IterationAllowedThreads = 0;
+        m_LCP_fcStartReleasee = NULL;
+        m_ji_4a = 0;
+        m_mi_iMJ = 0;
+        m_mi_fc = 0;
+        m_mi_Ad = 0;
+        m_LCP_iteration = 0;
+        m_cf_4b = 0;
+        m_ji_4b = 0;
+    }
+
+    void AssignLCP_IterationData(dCallReleaseeID releaseeInstance, unsigned int iterationAllowedThreads)
+    {
+        m_LCP_IterationSyncReleasee = releaseeInstance;
+        m_LCP_IterationAllowedThreads = iterationAllowedThreads;
+    }
+
+    void AssignLCP_fcStartReleasee(dCallReleaseeID releaseeInstance)
+    {
+        m_LCP_fcStartReleasee = releaseeInstance;
+    }
+
+    void AssignLCP_fcAllowedThreads(unsigned int prepareThreads, unsigned int completeThreads)
+    {
+        m_LCP_fcPrepareThreadsRemaining = prepareThreads;
+        m_LCP_fcCompleteThreadsTotal = completeThreads;
+    }
+
+    void ResetLCP_fcComputationIndex()
+    {
+        m_mi_fc = 0;
+    }
+
+    void ResetSOR_ConstraintsReorderVariables(unsigned reorderThreads)
+    {
+        m_SOR_reorderHeadTaken = 0;
+        m_SOR_reorderTailTaken = 0;
+        m_SOR_bi_zeroHeadTaken = 0;
+        m_SOR_bi_zeroTailTaken = 0;
+        m_SOR_mi_zeroHeadTaken = 0;
+        m_SOR_mi_zeroTailTaken = 0;
+        m_SOR_reorderThreadsRemaining = reorderThreads;
+    }
+
+    void RecordLCP_IterationStart(unsigned int totalThreads, dCallReleaseeID nextReleasee)
+    {
+        m_LCP_iterationThreadsTotal = totalThreads;
+        m_LCP_iterationThreadsRemaining = totalThreads;
+        m_LCP_iterationNextReleasee = nextReleasee;
+    }
+
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    const dxQuickStepperLocalContext   *m_localContext;
+    dReal                           *m_lambda;
+    dReal                           *m_cforce;
+    dReal                           *m_iMJ;
+    IndexError                      *m_order;
+    dReal                           *m_last_lambda;
+    atomicord32                     *m_bi_links_or_mi_levels;
+    atomicord32                     *m_mi_links;
+    dCallReleaseeID                 m_LCP_IterationSyncReleasee;
+    unsigned int                    m_LCP_IterationAllowedThreads;
+    dCallReleaseeID                 m_LCP_fcStartReleasee;
+    volatile atomicord32            m_ji_4a;
+    volatile atomicord32            m_mi_iMJ;
+    volatile atomicord32            m_mi_fc;
+    volatile atomicord32            m_LCP_fcPrepareThreadsRemaining;
+    unsigned int                    m_LCP_fcCompleteThreadsTotal;
+    volatile atomicord32            m_mi_Ad;
+    unsigned int                    m_LCP_iteration;
+    unsigned int                    m_LCP_iterationThreadsTotal;
+    volatile atomicord32            m_LCP_iterationThreadsRemaining;
+    dCallReleaseeID                 m_LCP_iterationNextReleasee;
+    volatile atomicord32            m_SOR_reorderHeadTaken;
+    volatile atomicord32            m_SOR_reorderTailTaken;
+    volatile atomicord32            m_SOR_bi_zeroHeadTaken;
+    volatile atomicord32            m_SOR_bi_zeroTailTaken;
+    volatile atomicord32            m_SOR_mi_zeroHeadTaken;
+    volatile atomicord32            m_SOR_mi_zeroTailTaken;
+    volatile atomicord32            m_SOR_reorderThreadsRemaining;
+    volatile atomicord32            m_cf_4b;
+    volatile atomicord32            m_ji_4b;
+};
+
+
+static int dxQuickStepIsland_Stage4a_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4LCP_iMJ_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4LCP_iMJSync_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4LCP_fcStart_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4LCP_fc_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+#ifdef WARM_STARTING
+static int dxQuickStepIsland_Stage4LCP_fcWarmComplete_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+#endif
+static int dxQuickStepIsland_Stage4LCP_Ad_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4LCP_ReorderPrep_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4LCP_IterationStart_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4LCP_ConstraintsReordering_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4LCP_ConstraintsReorderingSync_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4LCP_Iteration_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4LCP_IterationSync_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage4b_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage5_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+
+static void dxQuickStepIsland_Stage4a(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_iMJComputation(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_MTfcComputation(dxQuickStepperStage4CallContext *stage4CallContext, dCallReleaseeID callThisReleasee);
+#ifdef WARM_STARTING
+static void dxQuickStepIsland_Stage4LCP_MTfcComputation_warm(dxQuickStepperStage4CallContext *stage4CallContext, dCallReleaseeID callThisReleasee);
+static void dxQuickStepIsland_Stage4LCP_MTfcComputation_warmZeroArrays(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_MTfcComputation_warmPrepare(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_MTfcComputation_warmComplete(dxQuickStepperStage4CallContext *stage4CallContext);
+#endif
+static void dxQuickStepIsland_Stage4LCP_MTfcComputation_cold(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_STfcComputation(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_AdComputation(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_ReorderPrep(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_ConstraintsReordering(dxQuickStepperStage4CallContext *stage4CallContext);
+static bool dxQuickStepIsland_Stage4LCP_ConstraintsShuffling(dxQuickStepperStage4CallContext *stage4CallContext, unsigned int iteration);
+static void dxQuickStepIsland_Stage4LCP_LinksArraysZeroing(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_DependencyMapForNewOrderRebuilding(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_DependencyMapFromSavedLevelsReconstruction(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_MTIteration(dxQuickStepperStage4CallContext *stage4CallContext, unsigned int initiallyKnownToBeCompletedLevel);
+static void dxQuickStepIsland_Stage4LCP_STIteration(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage4LCP_IterationStep(dxQuickStepperStage4CallContext *stage4CallContext, unsigned int i);
+static void dxQuickStepIsland_Stage4b(dxQuickStepperStage4CallContext *stage4CallContext);
+static void dxQuickStepIsland_Stage5(dxQuickStepperStage5CallContext *stage5CallContext);
+
+
+struct dxQuickStepperStage6CallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *callContext, const dxQuickStepperLocalContext *localContext)
+    {
+        m_stepperCallContext = callContext;
+        m_localContext = localContext;
+        m_bi_6a = 0;
+        m_bi_6b = 0;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    const dxQuickStepperLocalContext *m_localContext;
+    volatile atomicord32            m_bi_6a;
+    volatile atomicord32            m_bi_6b;
+};
+
+static int dxQuickStepIsland_Stage6a_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage6aSync_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxQuickStepIsland_Stage6b_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+
+static void dxQuickStepIsland_Stage6a(dxQuickStepperStage6CallContext *stage6CallContext);
+static void dxQuickStepIsland_Stage6_VelocityCheck(dxQuickStepperStage6CallContext *stage6CallContext);
+static void dxQuickStepIsland_Stage6b(dxQuickStepperStage6CallContext *stage6CallContext);
+
+//***************************************************************************
+// various common computations involving the matrix J
+
+// compute iMJ = inv(M)*J'
+
+template<unsigned int step_size>
+void compute_invM_JT (volatile atomicord32 *mi_storage, dReal *iMJ, 
+    unsigned int m, const dReal *J, const dxJBodiesItem *jb,
+    dxBody * const *body, const dReal *invI)
+{
+    unsigned int m_steps = (m + (step_size - 1)) / step_size;
+
+    unsigned mi_step;
+    while ((mi_step = ThrsafeIncrementIntUpToLimit(mi_storage, m_steps)) != m_steps) {
+        unsigned int mi = mi_step * step_size;
+        const unsigned int miend = mi + dMIN(step_size, m - mi);
+
+        dReal *iMJ_ptr = iMJ + (sizeint)mi * IMJ__MAX;
+        const dReal *J_ptr = J + (sizeint)mi * JME__MAX;
+        while (true) {
+            int b1 = jb[mi].first;
+            int b2 = jb[mi].second;
+
+            dReal k1 = body[(unsigned)b1]->invMass;
+            for (unsigned int j = 0; j != JVE__L_COUNT; j++) iMJ_ptr[IMJ__1L_MIN + j] = k1 * J_ptr[JME__J1L_MIN + j];
+            const dReal *invIrow1 = invI + (sizeint)(unsigned)b1 * IIE__MAX + IIE__MATRIX_MIN;
+            dMultiply0_331 (iMJ_ptr + IMJ__1A_MIN, invIrow1, J_ptr + JME__J1A_MIN);
+
+            if (b2 != -1) {
+                dReal k2 = body[(unsigned)b2]->invMass;
+                for (unsigned int j = 0; j != JVE__L_COUNT; ++j) iMJ_ptr[IMJ__2L_MIN + j] = k2 * J_ptr[JME__J2L_MIN + j];
+                const dReal *invIrow2 = invI + (sizeint)(unsigned)b2 * IIE__MAX + IIE__MATRIX_MIN;
+                dMultiply0_331 (iMJ_ptr + IMJ__2A_MIN, invIrow2, J_ptr + JME__J2A_MIN);
+            }
+        
+            if (++mi == miend) {
+                break;
+            }
+            iMJ_ptr += IMJ__MAX;
+            J_ptr += JME__MAX;
+        }
+    }
+}
+
+#ifdef WARM_STARTING
+
+static 
+void multiply_invM_JT_init_array(unsigned int nb, atomicord32 *bi_links/*=[nb]*/)
+{
+    // const unsigned businessIndex_none = dxENCODE_INDEX(-1);
+    // for (unsigned int bi = 0; bi != nb; ++bi) {
+    //     bi_links[bi] = businessIndex_none;
+    // }
+    memset(bi_links, 0, nb * sizeof(bi_links[0]));
+}
+
+// compute out = inv(M)*J'*in.
+template<unsigned int step_size>
+void multiply_invM_JT_prepare(volatile atomicord32 *mi_storage, 
+    unsigned int m, const dxJBodiesItem *jb, atomicord32 *bi_links/*=[nb]*/, atomicord32 *mi_links/*=[2*m]*/)
+{
+    unsigned int m_steps = (m + (step_size - 1)) / step_size;
+
+    unsigned mi_step;
+    while ((mi_step = ThrsafeIncrementIntUpToLimit(mi_storage, m_steps)) != m_steps) {
+        unsigned int mi = mi_step * step_size;
+        const unsigned int miend = mi + dMIN(step_size, m - mi);
+
+        while (true) {
+            int b1 = jb[mi].first;
+            int b2 = jb[mi].second;
+
+            const unsigned encoded_mi = dxENCODE_INDEX(mi);
+            unsigned oldIndex_b1 = ThrsafeExchange(&bi_links[b1], encoded_mi);
+            mi_links[(sizeint)mi * 2] = oldIndex_b1;
+
+            if (b2 != -1) {
+                unsigned oldIndex_b2 = ThrsafeExchange(&bi_links[b2], encoded_mi);
+                mi_links[(sizeint)mi * 2 + 1] = oldIndex_b2;
+            }
+
+            if (++mi == miend) {
+                break;
+            }
+        }
+    }
+}
+
+template<unsigned int step_size, unsigned int out_offset, unsigned int out_stride>
+void multiply_invM_JT_complete(volatile atomicord32 *bi_storage, dReal *out, 
+    unsigned int nb, const dReal *iMJ, const dxJBodiesItem *jb, const dReal *in, 
+    atomicord32 *bi_links/*=[nb]*/, atomicord32 *mi_links/*=[2*m]*/)
+{
+    const unsigned businessIndex_none = dxENCODE_INDEX(-1);
+
+    unsigned int nb_steps = (nb + (step_size - 1)) / step_size;
+
+    unsigned bi_step;
+    while ((bi_step = ThrsafeIncrementIntUpToLimit(bi_storage, nb_steps)) != nb_steps) {
+        unsigned int bi = bi_step * step_size;
+        const unsigned int biend = bi + dMIN(step_size, nb - bi);
+
+        dReal *out_ptr = out + (sizeint)bi * out_stride + out_offset;
+        while (true) {
+            dReal psum0 = REAL(0.0), psum1 = REAL(0.0), psum2 = REAL(0.0), psum3 = REAL(0.0), psum4 = REAL(0.0), psum5 = REAL(0.0);
+
+            unsigned businessIndex = bi_links[bi];
+            while (businessIndex != businessIndex_none) {
+                unsigned int mi = dxDECODE_INDEX(businessIndex);
+                const dReal *iMJ_ptr;
+                
+                if (bi == jb[mi].first) {
+                    iMJ_ptr = iMJ + (sizeint)mi * IMJ__MAX + IMJ__1_MIN;
+                    businessIndex = mi_links[(sizeint)mi * 2];
+                }
+                else {
+                    dIASSERT(bi == jb[mi].second);
+
+                    iMJ_ptr = iMJ + (sizeint)mi * IMJ__MAX + IMJ__2_MIN;
+                    businessIndex = mi_links[(sizeint)mi * 2 + 1];
+                }
+
+                const dReal in_i = in[mi];
+                psum0 += in_i * iMJ_ptr[JVE_LX]; psum1 += in_i * iMJ_ptr[JVE_LY]; psum2 += in_i * iMJ_ptr[JVE_LZ];
+                psum3 += in_i * iMJ_ptr[JVE_AX]; psum4 += in_i * iMJ_ptr[JVE_AY]; psum5 += in_i * iMJ_ptr[JVE_AZ];
+            }
+
+            out_ptr[dDA_LX] = psum0; out_ptr[dDA_LY] = psum1; out_ptr[dDA_LZ] = psum2; 
+            out_ptr[dDA_AX] = psum3; out_ptr[dDA_AY] = psum4; out_ptr[dDA_AZ] = psum5;
+         
+            if (++bi == biend) {
+                break;
+            }
+            out_ptr += out_stride;
+        }
+    }
+}
+
+template<unsigned int out_offset, unsigned int out_stride>
+void _multiply_invM_JT (dReal *out, 
+    unsigned int m, unsigned int nb, dReal *iMJ, const dxJBodiesItem *jb, const dReal *in)
+{
+    dSetZero (out, (sizeint)nb * out_stride);
+    const dReal *iMJ_ptr = iMJ;
+    for (unsigned int i=0; i<m; i++) {
+        int b1 = jb[i].first;
+        int b2 = jb[i].second;
+        const dReal in_i = in[i];
+
+        dReal *out_ptr = out + (sizeint)(unsigned)b1 * out_stride + out_offset;
+        for (unsigned int j = JVE__MIN; j != JVE__MAX; j++) out_ptr[j - JVE__MIN] += iMJ_ptr[IMJ__1_MIN + j] * in_i;
+        dSASSERT(out_stride - out_offset >= JVE__MAX);
+        dSASSERT(JVE__MAX == (int)dDA__MAX);
+
+        if (b2 != -1) {
+            out_ptr = out + (sizeint)(unsigned)b2 * out_stride + out_offset;
+            for (unsigned int j = JVE__MIN; j != JVE__MAX; j++) out_ptr[j - JVE__MIN] += iMJ_ptr[IMJ__2_MIN + j] * in_i;
+            dSASSERT(out_stride - out_offset >= JVE__MAX);
+            dSASSERT(JVE__MAX == (int)dDA__MAX);
+        }
+
+        iMJ_ptr += IMJ__MAX;
+    }
+}
+#endif
+
+// compute out = J*in.
+template<unsigned int step_size, unsigned int in_offset, unsigned int in_stride>
+void multiplyAdd_J (volatile atomicord32 *mi_storage, 
+    unsigned int m, dReal *J, const dxJBodiesItem *jb, const dReal *in)
+{
+    unsigned int m_steps = (m + (step_size - 1)) / step_size;
+
+    unsigned mi_step;
+    while ((mi_step = ThrsafeIncrementIntUpToLimit(mi_storage, m_steps)) != m_steps) {
+        unsigned int mi = mi_step * step_size;
+        const unsigned int miend = mi + dMIN(step_size, m - mi);
+
+        dReal *J_ptr = J + (sizeint)mi * JME__MAX;
+        while (true) {
+            int b1 = jb[mi].first;
+            int b2 = jb[mi].second;
+            dReal sum = REAL(0.0);
+            const dReal *in_ptr = in + (sizeint)(unsigned)b1 * in_stride + in_offset;
+            for (unsigned int j = 0; j != JME__J1_COUNT; ++j) sum += J_ptr[j + JME__J1_MIN] * in_ptr[j];
+            dSASSERT(in_offset + JME__J1_COUNT <= in_stride);
+
+            if (b2 != -1) {
+                in_ptr = in + (sizeint)(unsigned)b2 * in_stride + in_offset;
+                for (unsigned int j = 0; j != JME__J2_COUNT; ++j) sum += J_ptr[j + JME__J2_MIN] * in_ptr[j];
+                dSASSERT(in_offset + JME__J2_COUNT <= in_stride);
+            }
+            J_ptr[JME_RHS] += sum;
+
+            if (++mi == miend) {
+                break;
+            }
+            J_ptr += JME__MAX;
+        }
+    }
+}
+
+
+struct IndexError {
+#if CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__BY_ERROR
+    dReal error;		// error to sort on
+#endif
+    int index;		// row index
+};
+
+
+#if CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__BY_ERROR
+
+static int compare_index_error (const void *a, const void *b)
+{
+    const IndexError *i1 = (IndexError*) a;
+    const IndexError *i2 = (IndexError*) b;
+    if (i1->error < i2->error) return -1;
+    if (i1->error > i2->error) return 1;
+    return 0;
+}
+
+#endif // #if CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__BY_ERROR
+
+static inline 
+bool IsSORConstraintsReorderRequiredForIteration(unsigned iteration)
+{
+    bool result = false;
+
+#if CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__BY_ERROR
+
+    result = true;
+
+
+#elif CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__RANDOMLY
+
+    // This logic is intended to skip randomization on the very first iteration
+    if (!dIN_RANGE(iteration, 0, RANDOM_CONSTRAINTS_REORDERING_FREQUENCY) 
+        ? dIN_RANGE(iteration % RANDOM_CONSTRAINTS_REORDERING_FREQUENCY, RRS__MIN, RRS__MAX) 
+        : iteration == 0) {
+        result = true;
+    }
+
+
+#else  // #if CONSTRAINTS_REORDERING_METHOD != REORDERING_METHOD__BY_ERROR && CONSTRAINTS_REORDERING_METHOD != REORDERING_METHOD__RANDOMLY
+
+    if (iteration == 0) {
+        result = true;
+    }
+
+
+#endif
+
+    return result;
+}
+
+/*extern */
+void dxQuickStepIsland(const dxStepperProcessingCallContext *callContext)
+{
+    dxWorldProcessMemArena *memarena = callContext->m_stepperArena;
+    unsigned int nb = callContext->m_islandBodiesCount;
+    unsigned int _nj = callContext->m_islandJointsCount;
+
+    dReal *invI = memarena->AllocateOveralignedArray<dReal>((sizeint)nb * IIE__MAX, INVI_ALIGNMENT);
+    dJointWithInfo1 *const jointinfos = memarena->AllocateArray<dJointWithInfo1>(_nj);
+
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+    dIASSERT(allowedThreads != 0);
+
+    void *stagesMemArenaState = memarena->SaveState();
+
+    dxQuickStepperStage1CallContext *stage1CallContext = (dxQuickStepperStage1CallContext *)memarena->AllocateBlock(sizeof(dxQuickStepperStage1CallContext));
+    stage1CallContext->Initialize(callContext, stagesMemArenaState, invI, jointinfos);
+
+    dxQuickStepperStage0BodiesCallContext *stage0BodiesCallContext = (dxQuickStepperStage0BodiesCallContext *)memarena->AllocateBlock(sizeof(dxQuickStepperStage0BodiesCallContext));
+    stage0BodiesCallContext->Initialize(callContext, invI);
+
+    dxQuickStepperStage0JointsCallContext *stage0JointsCallContext = (dxQuickStepperStage0JointsCallContext *)memarena->AllocateBlock(sizeof(dxQuickStepperStage0JointsCallContext));
+    stage0JointsCallContext->Initialize(callContext, jointinfos, &stage1CallContext->m_stage0Outputs);
+
+    if (allowedThreads == 1)
+    {
+        IFTIMING(dTimerStart("preprocessing"));
+        dxQuickStepIsland_Stage0_Bodies(stage0BodiesCallContext);
+        dxQuickStepIsland_Stage0_Joints(stage0JointsCallContext);
+        dxQuickStepIsland_Stage1(stage1CallContext);
+    }
+    else
+    {
+        unsigned bodyThreads = CalculateOptimalThreadsCount<1U>(nb, allowedThreads);
+        unsigned jointThreads = 1;
+
+        dxWorld *world = callContext->m_world;
+
+        dCallReleaseeID stage1CallReleasee;
+        world->PostThreadedCallForUnawareReleasee(NULL, &stage1CallReleasee, bodyThreads + jointThreads, callContext->m_finalReleasee, 
+            NULL, &dxQuickStepIsland_Stage1_Callback, stage1CallContext, 0, "QuickStepIsland Stage1");
+
+        // It is preferable to post single threaded task first to be started sooner
+        world->PostThreadedCall(NULL, NULL, 0, stage1CallReleasee, NULL, &dxQuickStepIsland_Stage0_Joints_Callback, stage0JointsCallContext, 0, "QuickStepIsland Stage0-Joints");
+        dIASSERT(jointThreads == 1);
+
+        if (bodyThreads > 1) {
+            world->PostThreadedCallsGroup(NULL, bodyThreads - 1, stage1CallReleasee, &dxQuickStepIsland_Stage0_Bodies_Callback, stage0BodiesCallContext, "QuickStepIsland Stage0-Bodies");
+        }
+        dxQuickStepIsland_Stage0_Bodies(stage0BodiesCallContext);
+        world->AlterThreadedCallDependenciesCount(stage1CallReleasee, -1);
+    }
+}    
+
+static 
+int dxQuickStepIsland_Stage0_Bodies_Callback(void *_callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage0BodiesCallContext *callContext = (dxQuickStepperStage0BodiesCallContext *)_callContext;
+    dxQuickStepIsland_Stage0_Bodies(callContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage0_Bodies(dxQuickStepperStage0BodiesCallContext *callContext)
+{
+    dxBody * const *body = callContext->m_stepperCallContext->m_islandBodiesStart;
+    unsigned int nb = callContext->m_stepperCallContext->m_islandBodiesCount;
+
+    if (ThrsafeExchange(&callContext->m_tagsTaken, 1) == 0)
+    {
+        // number all bodies in the body list - set their tag values
+        for (unsigned int i=0; i<nb; i++) body[i]->tag = i;
+    }
+
+    if (ThrsafeExchange(&callContext->m_gravityTaken, 1) == 0)
+    {
+        dxWorld *world = callContext->m_stepperCallContext->m_world;
+
+        // add the gravity force to all bodies
+        // since gravity does normally have only one component it's more efficient
+        // to run three loops for each individual component
+        dxBody *const *const bodyend = body + nb;
+        dReal gravity_x = world->gravity[0];
+        if (gravity_x) {
+            for (dxBody *const *bodycurr = body; bodycurr != bodyend; bodycurr++) {
+                dxBody *b = *bodycurr;
+                if ((b->flags & dxBodyNoGravity) == 0) {
+                    b->facc[0] += b->mass.mass * gravity_x;
+                }
+            }
+        }
+        dReal gravity_y = world->gravity[1];
+        if (gravity_y) {
+            for (dxBody *const *bodycurr = body; bodycurr != bodyend; bodycurr++) {
+                dxBody *b = *bodycurr;
+                if ((b->flags & dxBodyNoGravity) == 0) {
+                    b->facc[1] += b->mass.mass * gravity_y;
+                }
+            }
+        }
+        dReal gravity_z = world->gravity[2];
+        if (gravity_z) {
+            for (dxBody *const *bodycurr = body; bodycurr != bodyend; bodycurr++) {
+                dxBody *b = *bodycurr;
+                if ((b->flags & dxBodyNoGravity) == 0) {
+                    b->facc[2] += b->mass.mass * gravity_z;
+                }
+            }
+        }
+    }
+
+    // for all bodies, compute the inertia tensor and its inverse in the global
+    // frame, and compute the rotational force and add it to the torque
+    // accumulator. I and invI are a vertical stack of 3x4 matrices, one per body.
+    {
+        dReal *invI = callContext->m_invI;
+        unsigned int bodyIndex;
+        while ((bodyIndex = ThrsafeIncrementIntUpToLimit(&callContext->m_inertiaBodyIndex, nb)) != nb) {
+            dReal *invIrow = invI + (sizeint)bodyIndex * IIE__MAX;
+            dxBody *b = body[bodyIndex];
+
+            dMatrix3 tmp;
+            // compute inverse inertia tensor in global frame
+            dMultiply2_333 (tmp, b->invI, b->posr.R);
+            dMultiply0_333 (invIrow + IIE__MATRIX_MIN, b->posr.R, tmp);
+
+            // Don't apply gyroscopic torques to bodies
+            // if not flagged or the body is kinematic
+            if ((b->flags & dxBodyGyroscopic) && (b->invMass > 0)) {
+                dMatrix3 I;
+                // compute inertia tensor in global frame
+                dMultiply2_333 (tmp, b->mass.I, b->posr.R);
+                dMultiply0_333 (I, b->posr.R, tmp);
+                // compute rotational force
+#if 0
+                // Explicit computation
+                dMultiply0_331 (tmp, I, b->avel);
+                dSubtractVectorCross3(b->tacc, b->avel, tmp);
+#else
+                // Do the implicit computation based on 
+                //"Stabilizing Gyroscopic Forces in Rigid Multibody Simulations"
+                // (Lacoursière 2006)
+                dReal h = callContext->m_stepperCallContext->m_stepSize; // Step size
+                dVector3 L; // Compute angular momentum
+                dMultiply0_331(L, I, b->avel);
+                
+                // Compute a new effective 'inertia tensor'
+                // for the implicit step: the cross-product 
+                // matrix of the angular momentum plus the
+                // old tensor scaled by the timestep.  
+                // Itild may not be symmetric pos-definite, 
+                // but we can still use it to compute implicit
+                // gyroscopic torques.
+                dMatrix3 Itild = { 0 };  
+                dSetCrossMatrixMinus(Itild, L, 4);
+                for (int ii = dM3E__MIN; ii < dM3E__MAX; ++ii) {
+                    Itild[ii] = Itild[ii] * h + I[ii];
+                }
+
+                // Scale momentum by inverse time to get 
+                // a sort of "torque"
+                dScaleVector3(L, dRecip(h)); 
+                // Invert the pseudo-tensor
+                dMatrix3 itInv;
+                // This is a closed-form inversion.
+                // It's probably not numerically stable
+                // when dealing with small masses with
+                // a large asymmetry.
+                // An LU decomposition might be better.
+                if (dInvertMatrix3(itInv, Itild) != 0) {
+                    // "Divide" the original tensor
+                    // by the pseudo-tensor (on the right)
+                    dMultiply0_333(Itild, I, itInv);
+                    // Subtract an identity matrix
+                    Itild[dM3E_XX] -= 1; Itild[dM3E_YY] -= 1; Itild[dM3E_ZZ] -= 1;
+
+                    // This new inertia matrix rotates the 
+                    // momentum to get a new set of torques
+                    // that will work correctly when applied
+                    // to the old inertia matrix as explicit
+                    // torques with a semi-implicit update
+                    // step.
+                    dVector3 tau0;
+                    dMultiply0_331(tau0, Itild, L);
+                    
+                    // Add the gyro torques to the torque 
+                    // accumulator
+                    dAddVectors3(b->tacc, b->tacc, tau0);
+                }
+#endif
+            }
+        }
+    }
+}
+
+static 
+int dxQuickStepIsland_Stage0_Joints_Callback(void *_callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage0JointsCallContext *callContext = (dxQuickStepperStage0JointsCallContext *)_callContext;
+    dxQuickStepIsland_Stage0_Joints(callContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage0_Joints(dxQuickStepperStage0JointsCallContext *callContext)
+{
+    dxJoint * const *_joint = callContext->m_stepperCallContext->m_islandJointsStart;
+    unsigned int _nj = callContext->m_stepperCallContext->m_islandJointsCount;
+
+    // get joint information (m = total constraint dimension, nub = number of unbounded variables).
+    // joints with m=0 are inactive and are removed from the joints array
+    // entirely, so that the code that follows does not consider them.
+    {
+        unsigned int mcurr = 0, mfbcurr = 0;
+        dJointWithInfo1 *jicurr = callContext->m_jointinfos;
+        dxJoint *const *const _jend = _joint + _nj;
+        for (dxJoint *const *_jcurr = _joint; _jcurr != _jend; _jcurr++) {	// jicurr=dest, _jcurr=src
+            dxJoint *j = *_jcurr;
+            j->getInfo1 (&jicurr->info);
+            dIASSERT (/*jicurr->info.m >= 0 && */jicurr->info.m <= 6 && /*jicurr->info.nub >= 0 && */jicurr->info.nub <= jicurr->info.m);
+
+            unsigned int jm = jicurr->info.m;
+            if (jm != 0) {
+                mcurr += jm;
+                if (j->feedback != NULL) {
+                    mfbcurr += jm;
+                }
+                jicurr->joint = j;
+                jicurr++;
+            }
+        }
+        callContext->m_stage0Outputs->m = mcurr;
+        callContext->m_stage0Outputs->mfb = mfbcurr;
+        callContext->m_stage0Outputs->nj = (unsigned int)(jicurr - callContext->m_jointinfos); 
+        dIASSERT((sizeint)(jicurr - callContext->m_jointinfos) < UINT_MAX || (sizeint)(jicurr - callContext->m_jointinfos) == UINT_MAX); // to avoid "...always evaluates to true" warnings
+    }
+}
+
+static 
+int dxQuickStepIsland_Stage1_Callback(void *_stage1CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage1CallContext *stage1CallContext = (dxQuickStepperStage1CallContext *)_stage1CallContext;
+    dxQuickStepIsland_Stage1(stage1CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage1(dxQuickStepperStage1CallContext *stage1CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage1CallContext->m_stepperCallContext;
+    dReal *invI = stage1CallContext->m_invI;
+    dJointWithInfo1 *jointinfos = stage1CallContext->m_jointinfos;
+    unsigned int nj = stage1CallContext->m_stage0Outputs.nj;
+    unsigned int m = stage1CallContext->m_stage0Outputs.m;
+    unsigned int mfb = stage1CallContext->m_stage0Outputs.mfb;
+
+    dxWorldProcessMemArena *memarena = callContext->m_stepperArena;
+    memarena->RestoreState(stage1CallContext->m_stageMemArenaState);
+    stage1CallContext = NULL; // WARNING! _stage1CallContext is not valid after this point!
+    dIVERIFY(stage1CallContext == NULL); // To suppress unused variable assignment warnings
+
+    {
+        unsigned int _nj = callContext->m_islandJointsCount;
+        memarena->ShrinkArray<dJointWithInfo1>(jointinfos, _nj, nj);
+    }
+
+    dxMIndexItem *mindex = NULL;
+    dxJBodiesItem *jb = NULL;
+    int *findex = NULL;
+    dReal *J = NULL, *Jcopy = NULL;
+
+    // if there are constraints, compute the constraint force
+    if (m > 0) {
+        mindex = memarena->AllocateArray<dxMIndexItem>(nj + 1);
+        {
+            dxMIndexItem *mcurr = mindex;
+            unsigned int moffs = 0, mfboffs = 0;
+            mcurr->mIndex = moffs;
+            mcurr->fbIndex = mfboffs;
+            ++mcurr;
+
+            const dJointWithInfo1 *const jiend = jointinfos + nj;
+            for (const dJointWithInfo1 *jicurr = jointinfos; jicurr != jiend; ++jicurr) {
+                dxJoint *joint = jicurr->joint;
+                moffs += jicurr->info.m;
+                if (joint->feedback) { mfboffs += jicurr->info.m; }
+                mcurr->mIndex = moffs;
+                mcurr->fbIndex = mfboffs;
+                ++mcurr;
+            }
+        }
+
+        jb = memarena->AllocateArray<dxJBodiesItem>(m);
+        findex = memarena->AllocateArray<int>(m);
+        J = memarena->AllocateOveralignedArray<dReal>((sizeint)m * JME__MAX, JACOBIAN_ALIGNMENT);
+        Jcopy = memarena->AllocateOveralignedArray<dReal>((sizeint)mfb * JCE__MAX, JCOPY_ALIGNMENT);
+    }
+
+    dxQuickStepperLocalContext *localContext = (dxQuickStepperLocalContext *)memarena->AllocateBlock(sizeof(dxQuickStepperLocalContext));
+    localContext->Initialize(invI, jointinfos, nj, m, mfb, mindex, jb, findex, J, Jcopy);
+
+    void *stage1MemarenaState = memarena->SaveState();
+    dxQuickStepperStage3CallContext *stage3CallContext = (dxQuickStepperStage3CallContext*)memarena->AllocateBlock(sizeof(dxQuickStepperStage3CallContext));
+    stage3CallContext->Initialize(callContext, localContext, stage1MemarenaState);
+
+    if (m > 0) {
+        unsigned int nb = callContext->m_islandBodiesCount;
+        // create a constraint equation right hand side vector `rhs', a constraint
+        // force mixing vector `cfm', and LCP low and high bound vectors, and an
+        // 'findex' vector.
+        dReal *rhs_tmp = memarena->AllocateArray<dReal>((sizeint)nb * RHS__MAX);
+
+        dxQuickStepperStage2CallContext *stage2CallContext = (dxQuickStepperStage2CallContext*)memarena->AllocateBlock(sizeof(dxQuickStepperStage2CallContext));
+        stage2CallContext->Initialize(callContext, localContext, rhs_tmp);
+
+        const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+        dIASSERT(allowedThreads != 0);
+
+        if (allowedThreads == 1)
+        {
+            IFTIMING (dTimerNow ("create J"));
+            dxQuickStepIsland_Stage2a(stage2CallContext);
+            IFTIMING (dTimerNow ("compute rhs_tmp"));
+            dxQuickStepIsland_Stage2b(stage2CallContext);
+            dxQuickStepIsland_Stage2c(stage2CallContext);
+            dxQuickStepIsland_Stage3(stage3CallContext);
+        }
+        else
+        {
+            dxWorld *world = callContext->m_world;
+            
+            dCallReleaseeID stage3CallReleasee;
+            world->PostThreadedCallForUnawareReleasee(NULL, &stage3CallReleasee, 1, callContext->m_finalReleasee, 
+                NULL, &dxQuickStepIsland_Stage3_Callback, stage3CallContext, 0, "QuickStepIsland Stage3");
+
+            dCallReleaseeID stage2bSyncReleasee;
+            world->PostThreadedCall(NULL, &stage2bSyncReleasee, 1, stage3CallReleasee, 
+                NULL, &dxQuickStepIsland_Stage2bSync_Callback, stage2CallContext, 0, "QuickStepIsland Stage2b Sync");
+
+            unsigned stage2a_allowedThreads = CalculateOptimalThreadsCount<1U>(nj, allowedThreads);
+
+            dCallReleaseeID stage2aSyncReleasee;
+            world->PostThreadedCall(NULL, &stage2aSyncReleasee, stage2a_allowedThreads, stage2bSyncReleasee, 
+                NULL, &dxQuickStepIsland_Stage2aSync_Callback, stage2CallContext, 0, "QuickStepIsland Stage2a Sync");
+
+            if (stage2a_allowedThreads > 1) {
+                world->PostThreadedCallsGroup(NULL, stage2a_allowedThreads - 1, stage2aSyncReleasee, &dxQuickStepIsland_Stage2a_Callback, stage2CallContext, "QuickStepIsland Stage2a");
+            }
+            dxQuickStepIsland_Stage2a(stage2CallContext);
+            world->AlterThreadedCallDependenciesCount(stage2aSyncReleasee, -1);
+        }
+    }
+    else {
+        dxQuickStepIsland_Stage3(stage3CallContext);
+    }
+}
+
+
+static 
+int dxQuickStepIsland_Stage2a_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage2CallContext *stage2CallContext = (dxQuickStepperStage2CallContext *)_stage2CallContext;
+    dxQuickStepIsland_Stage2a(stage2CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage2a(dxQuickStepperStage2CallContext *stage2CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
+    dxQuickStepperLocalContext *localContext = stage2CallContext->m_localContext;
+    dJointWithInfo1 *jointinfos = localContext->m_jointinfos;
+    unsigned int nj = localContext->m_nj;
+    const dxMIndexItem *mindex = localContext->m_mindex;
+
+    const dReal stepsizeRecip = dRecip(callContext->m_stepSize);
+    {
+        int *findex = localContext->m_findex;
+        dReal *J = localContext->m_J;
+        dReal *JCopy = localContext->m_Jcopy;
+
+        // get jacobian data from constraints. an m*16 matrix will be created
+        // to store the two jacobian blocks from each constraint. it has this
+        // format:
+        //
+        //   l1 l1 l1 a1 a1 a1 rhs cfm l2 l2 l2 a2 a2 a2 lo hi \    .
+        //   l1 l1 l1 a1 a1 a1 rhs cfm l2 l2 l2 a2 a2 a2 lo hi  }-- jacobian for joint 0, body 1 and body 2 (3 rows)
+        //   l1 l1 l1 a1 a1 a1 rhs cfm l2 l2 l2 a2 a2 a2 lo hi /
+        //   l1 l1 l1 a1 a1 a1 rhs cfm l2 l2 l2 a2 a2 a2 lo hi }--- jacobian for joint 1, body 1 and body 2 (3 rows)
+        //   etc...
+        //
+        //   (lll) = linear jacobian data
+        //   (aaa) = angular jacobian data
+        //
+        dxWorld *world = callContext->m_world;
+        const dReal worldERP = world->global_erp;
+        const dReal worldCFM = world->global_cfm;
+
+        unsigned validFIndices = 0;
+
+        unsigned ji;
+        while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_J, nj)) != nj) {
+            const unsigned ofsi = mindex[ji].mIndex;
+            const unsigned int infom = mindex[ji + 1].mIndex - ofsi;
+
+            dReal *const JRow = J + (sizeint)ofsi * JME__MAX;
+            {
+                dReal *const JEnd = JRow + infom * JME__MAX;
+                for (dReal *JCurr = JRow; JCurr != JEnd; JCurr += JME__MAX) {
+                    dSetZero(JCurr + JME__J1_MIN, JME__J1_COUNT);
+                    JCurr[JME_RHS] = REAL(0.0);
+                    JCurr[JME_CFM] = worldCFM;
+                    dSetZero(JCurr + JME__J2_MIN, JME__J2_COUNT);
+                    JCurr[JME_LO] = -dInfinity;
+                    JCurr[JME_HI] = dInfinity;
+                    dSASSERT(JME__J1_COUNT + 2 + JME__J2_COUNT + 2 == JME__MAX);
+                }
+            }
+            int *findexRow = findex + ofsi;
+            dSetValue(findexRow, infom, -1);
+            
+            dxJoint *joint = jointinfos[ji].joint;
+            joint->getInfo2(stepsizeRecip, worldERP, JME__MAX, JRow + JME__J1_MIN, JRow + JME__J2_MIN, JME__MAX, JRow + JME__RHS_CFM_MIN, JRow + JME__LO_HI_MIN, findexRow);
+
+            // findex iteration is compact and is not going to pollute caches - do it first
+            {
+                // adjust returned findex values for global index numbering
+                int *const findicesEnd = findexRow + infom;
+                for (int *findexCurr = findexRow; findexCurr != findicesEnd; ++findexCurr) {
+                    int fival = *findexCurr;
+                    if (fival != -1) {
+                        *findexCurr = fival + ofsi;
+                        ++validFIndices;
+                    }
+                }
+            }
+            {
+                dReal *const JEnd = JRow + infom * JME__MAX;
+                for (dReal *JCurr = JRow; JCurr != JEnd; JCurr += JME__MAX) {
+                    JCurr[JME_RHS] *= stepsizeRecip;
+                    JCurr[JME_CFM] *= stepsizeRecip;
+                }
+            }
+            {
+                // we need a copy of Jacobian for joint feedbacks
+                // because it gets destroyed by SOR solver
+                // instead of saving all Jacobian, we can save just rows
+                // for joints, that requested feedback (which is normally much less)
+                unsigned mfbIndex = mindex[ji].fbIndex;
+                if (mfbIndex != mindex[ji + 1].fbIndex) {
+                    dReal *const JEnd = JRow + infom * JME__MAX;
+                    dReal *JCopyRow = JCopy + mfbIndex * JCE__MAX; // Random access by mfbIndex here! Do not optimize!
+                    for (const dReal *JCurr = JRow; ; ) {
+                        for (unsigned i = 0; i != JME__J1_COUNT; ++i) { JCopyRow[i + JCE__J1_MIN] = JCurr[i + JME__J1_MIN]; }
+                        for (unsigned j = 0; j != JME__J2_COUNT; ++j) { JCopyRow[j + JCE__J2_MIN] = JCurr[j + JME__J2_MIN]; }
+                        JCopyRow += JCE__MAX;
+                        dSASSERT((unsigned)JCE__J1_COUNT == JME__J1_COUNT);
+                        dSASSERT((unsigned)JCE__J2_COUNT == JME__J2_COUNT);
+                        dSASSERT(JCE__J1_COUNT + JCE__J2_COUNT == JCE__MAX);
+                        
+                        if ((JCurr += JME__MAX) == JEnd) {
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        if (validFIndices != 0) {
+            ThrsafeAdd(&localContext->m_valid_findices, validFIndices);
+        }
+    }
+
+    {
+        dxJBodiesItem *jb = localContext->m_jb;
+
+        // create an array of body numbers for each joint row
+        unsigned ji;
+        while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_jb, nj)) != nj) {
+            dxJoint *joint = jointinfos[ji].joint;
+            int b1 = (joint->node[0].body) ? (joint->node[0].body->tag) : -1;
+            int b2 = (joint->node[1].body) ? (joint->node[1].body->tag) : -1;
+
+            dxJBodiesItem *const jb_end = jb + mindex[ji + 1].mIndex;
+            dxJBodiesItem *jb_ptr = jb + mindex[ji].mIndex;
+            for (; jb_ptr != jb_end; ++jb_ptr) {
+                jb_ptr->first = b1;
+                jb_ptr->second = b2;
+            }
+        }
+    }
+}
+
+static 
+int dxQuickStepIsland_Stage2aSync_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    dxQuickStepperStage2CallContext *stage2CallContext = (dxQuickStepperStage2CallContext *)_stage2CallContext;
+    const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
+    const unsigned int nb = callContext->m_islandBodiesCount;
+
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+    unsigned int stage2b_allowedThreads = CalculateOptimalThreadsCount<dxQUICKSTEPISLAND_STAGE2B_STEP>(nb, allowedThreads);
+
+    if (stage2b_allowedThreads > 1) {
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(callThisReleasee, stage2b_allowedThreads - 1);
+        world->PostThreadedCallsGroup(NULL, stage2b_allowedThreads - 1, callThisReleasee, &dxQuickStepIsland_Stage2b_Callback, stage2CallContext, "QuickStepIsland Stage2b");
+    }
+    dxQuickStepIsland_Stage2b(stage2CallContext);
+
+    return 1;
+}
+
+static 
+int dxQuickStepIsland_Stage2b_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage2CallContext *stage2CallContext = (dxQuickStepperStage2CallContext *)_stage2CallContext;
+    dxQuickStepIsland_Stage2b(stage2CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage2b(dxQuickStepperStage2CallContext *stage2CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage2CallContext->m_localContext;
+
+    const dReal stepsizeRecip = dRecip(callContext->m_stepSize);
+    {
+        // Warning!!!
+        // This code reads facc/tacc fields of body objects which (the fields)
+        // may be modified by dxJoint::getInfo2(). Therefore the code must be
+        // in different sub-stage from Jacobian construction in Stage2a 
+        // to ensure proper synchronization and avoid accessing numbers being modified.
+        // Warning!!!
+        dxBody * const *const body = callContext->m_islandBodiesStart;
+        const unsigned int nb = callContext->m_islandBodiesCount;
+        const dReal *invI = localContext->m_invI;
+        dReal *rhs_tmp = stage2CallContext->m_rhs_tmp;
+
+        // compute the right hand side `rhs'
+
+        const unsigned int step_size = dxQUICKSTEPISLAND_STAGE2B_STEP;
+        unsigned int nb_steps = (nb + (step_size - 1)) / step_size;
+
+        // put -(v/h + invM*fe) into rhs_tmp
+        unsigned bi_step;
+        while ((bi_step = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_bi, nb_steps)) != nb_steps) {
+            unsigned int bi = bi_step * step_size;
+            const unsigned int biend = bi + dMIN(step_size, nb - bi);
+
+            dReal *rhscurr = rhs_tmp + (sizeint)bi * RHS__MAX;
+            const dReal *invIrow = invI + (sizeint)bi * IIE__MAX;
+            while (true) {
+                dxBody *b = body[bi];
+                dReal body_invMass = b->invMass;
+                for (unsigned int j = dSA__MIN; j != dSA__MAX; ++j) rhscurr[RHS__L_MIN + j] = -(b->facc[dV3E__AXES_MIN + j] * body_invMass + b->lvel[dV3E__AXES_MIN + j] * stepsizeRecip);
+                dMultiply0_331 (rhscurr + RHS__A_MIN, invIrow + IIE__MATRIX_MIN, b->tacc);
+                for (unsigned int k = dSA__MIN; k != dSA__MAX; ++k) rhscurr[RHS__A_MIN + k] = -(b->avel[dV3E__AXES_MIN + k] * stepsizeRecip) - rhscurr[RHS__A_MIN + k];
+                
+                if (++bi == biend) {
+                    break;
+                }
+                rhscurr += RHS__MAX;
+                invIrow += IIE__MAX;
+            }
+        }
+    }
+}
+
+static 
+int dxQuickStepIsland_Stage2bSync_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    dxQuickStepperStage2CallContext *stage2CallContext = (dxQuickStepperStage2CallContext *)_stage2CallContext;
+    const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+
+    const dxQuickStepperLocalContext *localContext = stage2CallContext->m_localContext;
+    unsigned int m = localContext->m_m;
+
+    unsigned int stage2c_allowedThreads = CalculateOptimalThreadsCount<dxQUICKSTEPISLAND_STAGE2C_STEP>(m, allowedThreads);
+
+    if (stage2c_allowedThreads > 1) {
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(callThisReleasee, stage2c_allowedThreads - 1);
+        world->PostThreadedCallsGroup(NULL, stage2c_allowedThreads - 1, callThisReleasee, &dxQuickStepIsland_Stage2c_Callback, stage2CallContext, "QuickStepIsland Stage2c");
+    }
+    dxQuickStepIsland_Stage2c(stage2CallContext);
+
+    return 1;
+}
+
+
+static 
+int dxQuickStepIsland_Stage2c_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage2CallContext *stage2CallContext = (dxQuickStepperStage2CallContext *)_stage2CallContext;
+    dxQuickStepIsland_Stage2c(stage2CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage2c(dxQuickStepperStage2CallContext *stage2CallContext)
+{
+    //const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage2CallContext->m_localContext;
+
+    //const dReal stepsizeRecip = dRecip(callContext->m_stepSize);
+    {
+        // Warning!!!
+        // This code depends on rhs_tmp and therefore must be in different sub-stage 
+        // from rhs_tmp calculation in Stage2b to ensure proper synchronization 
+        // and avoid accessing numbers being modified.
+        // Warning!!!
+        dReal *J = localContext->m_J;
+        const dxJBodiesItem *jb = localContext->m_jb;
+        const dReal *rhs_tmp = stage2CallContext->m_rhs_tmp;
+        const unsigned int m = localContext->m_m;
+
+        // add J*rhs_tmp to rhs
+        multiplyAdd_J<dxQUICKSTEPISLAND_STAGE2C_STEP, RHS__DYNAMICS_MIN, RHS__MAX>(&stage2CallContext->m_Jrhsi, m, J, jb, rhs_tmp);
+    }
+}
+
+
+static 
+int dxQuickStepIsland_Stage3_Callback(void *_stage3CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage3CallContext *stage3CallContext = (dxQuickStepperStage3CallContext *)_stage3CallContext;
+    dxQuickStepIsland_Stage3(stage3CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage3(dxQuickStepperStage3CallContext *stage3CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage3CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage3CallContext->m_localContext;
+
+    dxWorldProcessMemArena *memarena = callContext->m_stepperArena;
+    memarena->RestoreState(stage3CallContext->m_stage1MemArenaState);
+    stage3CallContext = NULL; // WARNING! stage3CallContext is not valid after this point!
+    dIVERIFY(stage3CallContext == NULL); // To suppress unused variable assignment warnings
+
+    void *stage3MemarenaState = memarena->SaveState();
+    dxQuickStepperStage5CallContext *stage5CallContext = (dxQuickStepperStage5CallContext *)memarena->AllocateBlock(sizeof(dxQuickStepperStage5CallContext));
+    stage5CallContext->Initialize(callContext, localContext, stage3MemarenaState);
+
+    unsigned int m = localContext->m_m;
+
+    if (m > 0) {
+        // load lambda from the value saved on the previous iteration
+        dReal *lambda = memarena->AllocateArray<dReal>(m);
+
+        unsigned int nb = callContext->m_islandBodiesCount;
+        dReal *cforce = memarena->AllocateArray<dReal>((sizeint)nb * CFE__MAX);
+        dReal *iMJ = memarena->AllocateOveralignedArray<dReal>((sizeint)m * IMJ__MAX, INVMJ_ALIGNMENT);
+        // order to solve constraint rows in
+        IndexError *order = memarena->AllocateArray<IndexError>(m);
+        dReal *last_lambda = NULL;
+#if CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__BY_ERROR
+        // the lambda computed at the previous iteration.
+        // this is used to measure error for when we are reordering the indexes.
+        last_lambda = memarena->AllocateArray<dReal>(m);
+#endif
+
+        const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+        bool singleThreadedExecution = allowedThreads == 1;
+        dIASSERT(allowedThreads >= 1);
+
+        atomicord32 *bi_links_or_mi_levels = NULL;
+        atomicord32 *mi_links = NULL;
+#if !dTHREADING_INTF_DISABLED
+        bi_links_or_mi_levels = memarena->AllocateArray<atomicord32>(dMAX(nb, m));
+        mi_links = memarena->AllocateArray<atomicord32>(2 * ((sizeint)m + 1));
+#else
+        dIASSERT(singleThreadedExecution);
+#endif
+        dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)memarena->AllocateBlock(sizeof(dxQuickStepperStage4CallContext));
+        stage4CallContext->Initialize(callContext, localContext, lambda, cforce, iMJ, order, last_lambda, bi_links_or_mi_levels, mi_links);
+
+        if (singleThreadedExecution)
+        {
+            dxQuickStepIsland_Stage4a(stage4CallContext);
+
+            IFTIMING (dTimerNow ("solving LCP problem"));
+            dxQuickStepIsland_Stage4LCP_iMJComputation(stage4CallContext);
+            dxQuickStepIsland_Stage4LCP_STfcComputation(stage4CallContext);
+            dxQuickStepIsland_Stage4LCP_AdComputation(stage4CallContext);
+            dxQuickStepIsland_Stage4LCP_ReorderPrep(stage4CallContext);
+            
+            dxWorld *world = callContext->m_world;
+            const unsigned int num_iterations = world->qs.num_iterations;
+            for (unsigned int iteration=0; iteration < num_iterations; iteration++) {
+                if (IsSORConstraintsReorderRequiredForIteration(iteration)) {
+                    stage4CallContext->ResetSOR_ConstraintsReorderVariables(0);
+                    dxQuickStepIsland_Stage4LCP_ConstraintsShuffling(stage4CallContext, iteration);
+                }
+                dxQuickStepIsland_Stage4LCP_STIteration(stage4CallContext);
+            }
+
+            dxQuickStepIsland_Stage4b(stage4CallContext);
+            dxQuickStepIsland_Stage5(stage5CallContext);
+        }
+        else
+        {
+            dxWorld *world = callContext->m_world;
+
+            dCallReleaseeID stage5CallReleasee;
+            world->PostThreadedCallForUnawareReleasee(NULL, &stage5CallReleasee, 1, callContext->m_finalReleasee, 
+                NULL, &dxQuickStepIsland_Stage5_Callback, stage5CallContext, 0, "QuickStepIsland Stage5");
+
+            dCallReleaseeID stage4LCP_IterationSyncReleasee;
+            world->PostThreadedCall(NULL, &stage4LCP_IterationSyncReleasee, 1, stage5CallReleasee, 
+                NULL, &dxQuickStepIsland_Stage4LCP_IterationSync_Callback, stage4CallContext, 0, "QuickStepIsland Stage4LCP_Iteration Sync");
+
+            unsigned int stage4LCP_Iteration_allowedThreads = CalculateOptimalThreadsCount<1U>(m, allowedThreads);
+            stage4CallContext->AssignLCP_IterationData(stage4LCP_IterationSyncReleasee, stage4LCP_Iteration_allowedThreads);
+
+            dCallReleaseeID stage4LCP_IterationStartReleasee;
+            world->PostThreadedCall(NULL, &stage4LCP_IterationStartReleasee, 3, stage4LCP_IterationSyncReleasee, 
+                NULL, &dxQuickStepIsland_Stage4LCP_IterationStart_Callback, stage4CallContext, 0, "QuickStepIsland Stage4LCP_Iteration Start");
+
+            unsigned int nj = localContext->m_nj;
+            unsigned int stage4a_allowedThreads = CalculateOptimalThreadsCount<dxQUICKSTEPISLAND_STAGE4A_STEP>(nj, allowedThreads);
+
+            dCallReleaseeID stage4LCP_fcStartReleasee;
+            // Note: It is unnecessary to make fc dependent on 4a if there is no WARM_STARTING
+            // However I'm doing so to minimize use of preprocessor conditions in sources
+            unsigned stage4LCP_fcDependenciesCountToUse = stage4a_allowedThreads;
+#ifdef WARM_STARTING
+            // Posted with extra dependency to be removed from dxQuickStepIsland_Stage4LCP_iMJSync_Callback
+            stage4LCP_fcDependenciesCountToUse += 1;
+#endif
+            world->PostThreadedCall(NULL, &stage4LCP_fcStartReleasee, stage4LCP_fcDependenciesCountToUse, stage4LCP_IterationStartReleasee, 
+                NULL, &dxQuickStepIsland_Stage4LCP_fcStart_Callback, stage4CallContext, 0, "QuickStepIsland Stage4LCP_fc Start");
+#ifdef WARM_STARTING
+            stage4CallContext->AssignLCP_fcStartReleasee(stage4LCP_fcStartReleasee);
+#endif
+
+            unsigned stage4LCP_iMJ_allowedThreads = CalculateOptimalThreadsCount<dxQUICKSTEPISLAND_STAGE4LCP_IMJ_STEP>(m, allowedThreads);
+
+            dCallReleaseeID stage4LCP_iMJSyncReleasee;
+            world->PostThreadedCall(NULL, &stage4LCP_iMJSyncReleasee, stage4LCP_iMJ_allowedThreads, stage4LCP_IterationStartReleasee, 
+                NULL, &dxQuickStepIsland_Stage4LCP_iMJSync_Callback, stage4CallContext, 0, "QuickStepIsland Stage4LCP_iMJ Sync");
+
+            world->PostThreadedCall(NULL, NULL, 0, stage4LCP_IterationStartReleasee, NULL, &dxQuickStepIsland_Stage4LCP_ReorderPrep_Callback, stage4CallContext, 0, "QuickStepIsland Stage4LCP_ReorderPrep");
+            world->PostThreadedCallsGroup(NULL, stage4a_allowedThreads, stage4LCP_fcStartReleasee, &dxQuickStepIsland_Stage4a_Callback, stage4CallContext, "QuickStepIsland Stage4a");
+            
+            if (stage4LCP_iMJ_allowedThreads > 1) {
+                world->PostThreadedCallsGroup(NULL, stage4LCP_iMJ_allowedThreads - 1, stage4LCP_iMJSyncReleasee, &dxQuickStepIsland_Stage4LCP_iMJ_Callback, stage4CallContext, "QuickStepIsland Stage4LCP_iMJ");
+            }
+            dxQuickStepIsland_Stage4LCP_iMJComputation(stage4CallContext);
+            world->AlterThreadedCallDependenciesCount(stage4LCP_iMJSyncReleasee, -1);
+        }
+    }
+    else {
+        dxQuickStepIsland_Stage5(stage5CallContext);
+    }
+}
+
+static 
+int dxQuickStepIsland_Stage4a_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    dxQuickStepIsland_Stage4a(stage4CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage4a(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    dReal *lambda = stage4CallContext->m_lambda;
+    const dxMIndexItem *mindex = localContext->m_mindex;
+#ifdef WARM_STARTING
+    dJointWithInfo1 *jointinfos = localContext->m_jointinfos;
+#endif
+    unsigned int nj = localContext->m_nj;
+    const unsigned int step_size = dxQUICKSTEPISLAND_STAGE4A_STEP;
+    unsigned int nj_steps = (nj + (step_size - 1)) / step_size;
+    
+    unsigned ji_step;
+    while ((ji_step = ThrsafeIncrementIntUpToLimit(&stage4CallContext->m_ji_4a, nj_steps)) != nj_steps) {
+        unsigned int ji = ji_step * step_size;
+        dReal *lambdacurr = lambda + mindex[ji].mIndex;
+#ifdef WARM_STARTING
+        const dJointWithInfo1 *jicurr = jointinfos + ji;
+        const dJointWithInfo1 *const jiend = jicurr + dMIN(step_size, nj - ji);
+        
+        do {
+            const dReal *joint_lambdas = jicurr->joint->lambda;
+            dReal *const lambdsnext = lambdacurr + jicurr->info.m;
+            
+            while (true) {
+                // for warm starting, multiplication by 0.9 seems to be necessary to prevent
+                // jerkiness in motor-driven joints. I have no idea why this works.
+                *lambdacurr = *joint_lambdas * 0.9;
+
+                if (++lambdacurr == lambdsnext) {
+                    break;
+                }
+
+                ++joint_lambdas;
+            }
+        } 
+        while (++jicurr != jiend);
+#else
+        dReal *lambdsnext = lambda + mindex[ji + dMIN(step_size, nj - ji)].mIndex;
+        dSetZero(lambdacurr, lambdsnext - lambdacurr);
+#endif
+    }
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_iMJ_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    dxQuickStepIsland_Stage4LCP_iMJComputation(stage4CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_iMJComputation(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    dReal *iMJ = stage4CallContext->m_iMJ;
+    unsigned int m = localContext->m_m;
+    dReal *J = localContext->m_J;
+    const dxJBodiesItem *jb = localContext->m_jb;
+    dxBody * const *body = callContext->m_islandBodiesStart;
+    dReal *invI = localContext->m_invI;
+
+    // precompute iMJ = inv(M)*J'
+    compute_invM_JT<dxQUICKSTEPISLAND_STAGE4LCP_IMJ_STEP>(&stage4CallContext->m_mi_iMJ, iMJ, m, J, jb, body, invI);
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_iMJSync_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+    
+    unsigned int m = localContext->m_m;
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+
+    unsigned int stage4LCP_Ad_allowedThreads = CalculateOptimalThreadsCount<dxQUICKSTEPISLAND_STAGE4LCP_AD_STEP>(m, allowedThreads);
+
+#ifdef WARM_STARTING
+    {
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(stage4CallContext->m_LCP_fcStartReleasee, -1);
+    }
+#endif
+    
+    if (stage4LCP_Ad_allowedThreads > 1) {
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(callThisReleasee, stage4LCP_Ad_allowedThreads - 1);
+        world->PostThreadedCallsGroup(NULL, stage4LCP_Ad_allowedThreads - 1, callThisReleasee, &dxQuickStepIsland_Stage4LCP_Ad_Callback, stage4CallContext, "QuickStepIsland Stage4LCP_Ad");
+    }
+    dxQuickStepIsland_Stage4LCP_AdComputation(stage4CallContext);
+
+    return 1;
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_fcStart_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    unsigned int fcPrepareComplexity, fcCompleteComplexity;
+#ifdef WARM_STARTING
+    fcPrepareComplexity = localContext->m_m / dxQUICKSTEPISLAND_STAGE4LCP_FC_COMPLETE_TO_PREPARE_COMPLEXITY_DIVISOR;
+    fcCompleteComplexity = callContext->m_islandBodiesCount;
+#else
+    fcPrepareComplexity = localContext->m_m;
+    fcCompleteComplexity = 0;
+#endif
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+    unsigned int stage4LCP_fcPrepare_allowedThreads = CalculateOptimalThreadsCount<dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP>(fcPrepareComplexity, allowedThreads);
+    unsigned int stage4LCP_fcComplete_allowedThreads = CalculateOptimalThreadsCount<dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP>(fcCompleteComplexity, allowedThreads);
+    stage4CallContext->AssignLCP_fcAllowedThreads(stage4LCP_fcPrepare_allowedThreads, stage4LCP_fcComplete_allowedThreads);
+
+#ifdef WARM_STARTING
+    dxQuickStepIsland_Stage4LCP_MTfcComputation_warmZeroArrays(stage4CallContext);
+#endif
+
+    if (stage4LCP_fcPrepare_allowedThreads > 1) {
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(callThisReleasee, stage4LCP_fcPrepare_allowedThreads - 1);
+        world->PostThreadedCallsGroup(NULL, stage4LCP_fcPrepare_allowedThreads - 1, callThisReleasee, &dxQuickStepIsland_Stage4LCP_fc_Callback, stage4CallContext, "QuickStepIsland Stage4LCP_fc");
+    }
+    dxQuickStepIsland_Stage4LCP_MTfcComputation(stage4CallContext, callThisReleasee);
+
+    return 1;
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_fc_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    dxQuickStepIsland_Stage4LCP_MTfcComputation(stage4CallContext, callThisReleasee);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_MTfcComputation(dxQuickStepperStage4CallContext *stage4CallContext, dCallReleaseeID callThisReleasee)
+{
+#ifdef WARM_STARTING
+    dxQuickStepIsland_Stage4LCP_MTfcComputation_warm(stage4CallContext, callThisReleasee);
+#else
+    (void)callThisReleasee; // unused
+    dxQuickStepIsland_Stage4LCP_MTfcComputation_cold(stage4CallContext);
+#endif
+}
+
+#ifdef WARM_STARTING
+
+static 
+void dxQuickStepIsland_Stage4LCP_MTfcComputation_warm(dxQuickStepperStage4CallContext *stage4CallContext, dCallReleaseeID callThisReleasee)
+{
+    dxQuickStepIsland_Stage4LCP_MTfcComputation_warmPrepare(stage4CallContext);
+
+    if (ThrsafeExchangeAdd(&stage4CallContext->m_LCP_fcPrepareThreadsRemaining, (atomicord32)(-1)) == 1) {
+        stage4CallContext->ResetLCP_fcComputationIndex();
+
+        const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+        unsigned int stage4LCP_fcComplete_allowedThreads = stage4CallContext->m_LCP_fcCompleteThreadsTotal;
+
+        if (stage4LCP_fcComplete_allowedThreads > 1) {
+            dxWorld *world = callContext->m_world;
+            world->AlterThreadedCallDependenciesCount(callThisReleasee, stage4LCP_fcComplete_allowedThreads - 1);
+            world->PostThreadedCallsGroup(NULL, stage4LCP_fcComplete_allowedThreads - 1, callThisReleasee, &dxQuickStepIsland_Stage4LCP_fcWarmComplete_Callback, stage4CallContext, "QuickStepIsland Stage4LCP_fcWarmComplete");
+        }
+        dxQuickStepIsland_Stage4LCP_MTfcComputation_warmComplete(stage4CallContext);
+    }
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_MTfcComputation_warmZeroArrays(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+
+    unsigned int nb = callContext->m_islandBodiesCount;
+    atomicord32 *bi_links = stage4CallContext->m_bi_links_or_mi_levels;
+
+    multiply_invM_JT_init_array(nb, bi_links);
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_MTfcComputation_warmPrepare(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    unsigned int m = localContext->m_m;
+    const dxJBodiesItem *jb = localContext->m_jb;
+
+    // Prepare to compute fc=(inv(M)*J')*lambda. we will incrementally maintain fc
+    // as we change lambda.
+    multiply_invM_JT_prepare<dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP_PREPARE>(&stage4CallContext->m_mi_fc, m, jb, stage4CallContext->m_bi_links_or_mi_levels, stage4CallContext->m_mi_links);
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_fcWarmComplete_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+
+    dxQuickStepIsland_Stage4LCP_MTfcComputation_warmComplete(stage4CallContext);
+
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_MTfcComputation_warmComplete(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    dReal *fc = stage4CallContext->m_cforce;
+    unsigned int nb = callContext->m_islandBodiesCount;
+    dReal *iMJ = stage4CallContext->m_iMJ;
+    const dxJBodiesItem *jb = localContext->m_jb;
+    dReal *lambda = stage4CallContext->m_lambda;
+
+    // Complete computation of fc=(inv(M)*J')*lambda. we will incrementally maintain fc
+    // as we change lambda.
+    multiply_invM_JT_complete<dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP_COMPLETE, CFE__DYNAMICS_MIN, CFE__MAX>(&stage4CallContext->m_mi_fc, fc, nb, iMJ, jb, lambda, stage4CallContext->m_bi_links_or_mi_levels, stage4CallContext->m_mi_links);
+}
+
+#else // #ifndef WARM_STARTING
+
+static 
+void dxQuickStepIsland_Stage4LCP_MTfcComputation_cold(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+
+    dReal *fc = stage4CallContext->m_cforce;
+    unsigned int nb = callContext->m_islandBodiesCount;
+    const unsigned int step_size = dxQUICKSTEPISLAND_STAGE4LCP_FC_STEP;
+    unsigned int nb_steps = (nb + (step_size - 1)) / step_size;
+
+    unsigned bi_step;
+    while ((bi_step = ThrsafeIncrementIntUpToLimit(&stage4CallContext->m_mi_fc, nb_steps)) != nb_steps) {
+        unsigned int bi = bi_step * step_size;
+        unsigned int bicnt = dMIN(step_size, nb - bi);
+        dSetZero(fc + (sizeint)bi * CFE__MAX, (sizeint)bicnt * CFE__MAX);
+    }
+}
+
+#endif // #ifndef WARM_STARTING
+
+
+static 
+void dxQuickStepIsland_Stage4LCP_STfcComputation(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+#ifdef WARM_STARTING
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    dReal *fc = stage4CallContext->m_cforce;
+    unsigned int m = localContext->m_m;
+    unsigned int nb = callContext->m_islandBodiesCount;
+    dReal *iMJ = stage4CallContext->m_iMJ;
+    const dxJBodiesItem *jb = localContext->m_jb;
+    dReal *lambda = stage4CallContext->m_lambda;
+
+    // compute fc=(inv(M)*J')*lambda. we will incrementally maintain fc
+    // as we change lambda.
+    _multiply_invM_JT<CFE__DYNAMICS_MIN, CFE__MAX>(fc, m, nb, iMJ, jb, lambda);
+#else
+	dReal *fc = stage4CallContext->m_cforce;
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    unsigned int nb = callContext->m_islandBodiesCount;
+
+    dSetZero(fc, (sizeint)nb * CFE__MAX);
+#endif
+
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_Ad_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    dxQuickStepIsland_Stage4LCP_AdComputation(stage4CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_AdComputation(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    const dxJBodiesItem *jb = localContext->m_jb;
+    dReal *J = localContext->m_J;
+    unsigned int m = localContext->m_m;
+
+    dxWorld *world = callContext->m_world;
+    dxQuickStepParameters *qs = &world->qs;
+    const dReal sor_w = qs->w;		// SOR over-relaxation parameter
+
+    dReal *iMJ = stage4CallContext->m_iMJ;
+
+    const unsigned int step_size = dxQUICKSTEPISLAND_STAGE4LCP_AD_STEP;
+    unsigned int m_steps = (m + (step_size - 1)) / step_size;
+
+    unsigned mi_step;
+    while ((mi_step = ThrsafeIncrementIntUpToLimit(&stage4CallContext->m_mi_Ad, m_steps)) != m_steps) {
+        unsigned int mi = mi_step * step_size;
+        const unsigned int miend = mi + dMIN(step_size, m - mi);
+
+        const dReal *iMJ_ptr = iMJ + (sizeint)mi * IMJ__MAX;
+        dReal *J_ptr = J + (sizeint)mi * JME__MAX;
+        while (true) {
+            dReal sum = REAL(0.0);
+            {
+                for (unsigned int j = JVE__MIN; j != JVE__MAX; ++j) sum += iMJ_ptr[IMJ__1_MIN + j] * J_ptr[JME__J1_MIN + j];
+                dSASSERT(JME__J1_COUNT == (int)JVE__MAX);
+            }
+
+            int b2 = jb[mi].second;
+            if (b2 != -1) {
+                for (unsigned int k = JVE__MIN; k != JVE__MAX; ++k) sum += iMJ_ptr[IMJ__2_MIN + k] * J_ptr[JME__J2_MIN + k];
+                dSASSERT(JME__J2_COUNT == (int)JVE__MAX);
+            }
+
+            dReal cfm_i = J_ptr[JME_CFM];
+            dReal Ad_i = sor_w / (sum + cfm_i);
+
+            // NOTE: This may seem unnecessary but it's indeed an optimization 
+            // to move multiplication by Ad[i] and cfm[i] out of iteration loop.
+
+            // scale cfm, J and b by Ad
+            J_ptr[JME_CFM] = cfm_i * Ad_i;
+            J_ptr[JME_RHS] *= Ad_i;
+
+            {
+                for (unsigned int j = JVE__MIN; j != JVE__MAX; ++j) J_ptr[JME__J1_MIN + j] *= Ad_i;
+                dSASSERT(JME__J1_COUNT == (int)JVE__MAX);
+            }
+
+            if (b2 != -1) {
+                for (unsigned int k = JVE__MIN; k != JVE__MAX; ++k) J_ptr[JME__J2_MIN + k] *= Ad_i;
+                dSASSERT(JME__J2_COUNT == (int)JVE__MAX);
+            }
+
+            if (++mi == miend) {
+                break;
+            }
+            iMJ_ptr += IMJ__MAX;
+            J_ptr += JME__MAX;
+        }
+    }
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_ReorderPrep_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    dxQuickStepIsland_Stage4LCP_ReorderPrep(stage4CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_ReorderPrep(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+    unsigned int m = localContext->m_m;
+    unsigned int valid_findices = localContext->m_valid_findices;
+
+    IndexError *order = stage4CallContext->m_order;
+
+    {
+        // make sure constraints with findex < 0 come first.
+        IndexError *orderhead = order, *ordertail = order + (m - valid_findices);
+        const int *findex = localContext->m_findex;
+
+        // Fill the array from both ends
+        for (unsigned int i = 0; i != m; ++i) {
+            if (findex[i] == -1) {
+                orderhead->index = i; // Place them at the front
+                ++orderhead;
+            } else {
+                ordertail->index = i; // Place them at the end
+                ++ordertail;
+            }
+        }
+        dIASSERT(orderhead == order + (m - valid_findices));
+        dIASSERT(ordertail == order + m);
+    }
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_IterationStart_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+
+    dxWorld *world = callContext->m_world;
+    dxQuickStepParameters *qs = &world->qs;
+
+    const unsigned int num_iterations = qs->num_iterations;
+    unsigned iteration = stage4CallContext->m_LCP_iteration;
+    
+    if (iteration < num_iterations)
+    {
+        dCallReleaseeID nextReleasee;
+        dCallReleaseeID stage4LCP_IterationSyncReleasee = stage4CallContext->m_LCP_IterationSyncReleasee;
+        unsigned int stage4LCP_Iteration_allowedThreads = stage4CallContext->m_LCP_IterationAllowedThreads;
+
+        bool reorderRequired = false;
+
+        if (IsSORConstraintsReorderRequiredForIteration(iteration))
+        {
+            reorderRequired = true;
+        }
+
+        unsigned syncCallDependencies = reorderRequired ? 1 : stage4LCP_Iteration_allowedThreads;
+
+        // Increment iterations counter in advance as anyway it needs to be incremented 
+        // before independent tasks (the reordering or the iteration) are posted
+        // (otherwise next iteration may complete before the increment 
+        // and the same iteration index may be used again).
+        stage4CallContext->m_LCP_iteration = iteration + 1;
+
+        if (iteration + 1 != num_iterations) {
+            dCallReleaseeID stage4LCP_IterationStartReleasee;
+            world->PostThreadedCallForUnawareReleasee(NULL, &stage4LCP_IterationStartReleasee, syncCallDependencies, stage4LCP_IterationSyncReleasee, 
+                NULL, &dxQuickStepIsland_Stage4LCP_IterationStart_Callback, stage4CallContext, 0, "QuickStepIsland Stage4LCP_Iteration Start");
+            nextReleasee = stage4LCP_IterationStartReleasee;
+        }
+        else {
+            world->AlterThreadedCallDependenciesCount(stage4LCP_IterationSyncReleasee, syncCallDependencies);
+            nextReleasee = stage4LCP_IterationSyncReleasee;
+        }
+
+        if (reorderRequired) {
+            const unsigned int reorderThreads = 2;
+            dIASSERT(callContext->m_stepperAllowedThreads >= 2); // Otherwise the single-threaded execution path would be taken
+
+            stage4CallContext->ResetSOR_ConstraintsReorderVariables(reorderThreads);
+
+            dCallReleaseeID stage4LCP_ConstraintsReorderingSyncReleasee;
+            world->PostThreadedCall(NULL, &stage4LCP_ConstraintsReorderingSyncReleasee, reorderThreads, nextReleasee, 
+                NULL, &dxQuickStepIsland_Stage4LCP_ConstraintsReorderingSync_Callback, stage4CallContext, 0, "QuickStepIsland Stage4LCP_ConstraintsReordering Sync");
+
+            if (reorderThreads > 1) {
+                world->PostThreadedCallsGroup(NULL, reorderThreads - 1, stage4LCP_ConstraintsReorderingSyncReleasee, &dxQuickStepIsland_Stage4LCP_ConstraintsReordering_Callback, stage4CallContext, "QuickStepIsland Stage4LCP_ConstraintsReordering");
+            }
+            dxQuickStepIsland_Stage4LCP_ConstraintsReordering(stage4CallContext);
+            world->AlterThreadedCallDependenciesCount(stage4LCP_ConstraintsReorderingSyncReleasee, -1);
+        }
+        else {
+            dIASSERT(iteration != 0); {
+                dxQuickStepIsland_Stage4LCP_DependencyMapFromSavedLevelsReconstruction(stage4CallContext);
+            }
+
+            stage4CallContext->RecordLCP_IterationStart(stage4LCP_Iteration_allowedThreads, nextReleasee);
+
+            unsigned knownToBeCompletedLevel = dxHEAD_INDEX;
+            if (stage4LCP_Iteration_allowedThreads > 1) {
+                world->PostThreadedCallsIndexOverridenGroup(NULL, stage4LCP_Iteration_allowedThreads - 1, nextReleasee, &dxQuickStepIsland_Stage4LCP_Iteration_Callback, stage4CallContext, knownToBeCompletedLevel, "QuickStepIsland Stage4LCP_Iteration");
+            }
+            dxQuickStepIsland_Stage4LCP_MTIteration(stage4CallContext, knownToBeCompletedLevel);
+            world->AlterThreadedCallDependenciesCount(nextReleasee, -1);
+        }
+    }
+
+    return 1;
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_ConstraintsReordering_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    dxQuickStepIsland_Stage4LCP_ConstraintsReordering(stage4CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_ConstraintsReordering(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    unsigned int iteration = stage4CallContext->m_LCP_iteration - 1; // Iteration is pre-incremented before scheduled tasks are released for execution
+    if (dxQuickStepIsland_Stage4LCP_ConstraintsShuffling(stage4CallContext, iteration)) {
+
+        dxQuickStepIsland_Stage4LCP_LinksArraysZeroing(stage4CallContext);
+        if (ThrsafeExchangeAdd(&stage4CallContext->m_SOR_reorderThreadsRemaining, (atomicord32)(-1)) == 1) { // If last thread has exited the reordering routine...
+            // Rebuild the object dependency map
+            dxQuickStepIsland_Stage4LCP_DependencyMapForNewOrderRebuilding(stage4CallContext);
+        }
+    }
+    else {
+        // NOTE: So far, this branch is only called in CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__BY_ERROR case
+        if (ThrsafeExchangeAdd(&stage4CallContext->m_SOR_reorderThreadsRemaining, (atomicord32)(-1)) == 1) { // If last thread has exited the reordering routine...
+            dIASSERT(iteration != 0);
+            dxQuickStepIsland_Stage4LCP_DependencyMapFromSavedLevelsReconstruction(stage4CallContext);
+        }
+    }
+}
+
+static 
+bool dxQuickStepIsland_Stage4LCP_ConstraintsShuffling(dxQuickStepperStage4CallContext *stage4CallContext, unsigned int iteration)
+{
+    bool result = false;
+
+#if CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__BY_ERROR
+
+    struct ConstraintsReorderingHelper
+    {
+        void operator ()(dxQuickStepperStage4CallContext *stage4CallContext, unsigned int startIndex, unsigned int endIndex)
+        {
+            const dReal *lambda = stage4CallContext->m_lambda;
+            dReal *last_lambda = stage4CallContext->m_last_lambda;
+            IndexError *order = stage4CallContext->m_order;
+
+            for (unsigned int index = startIndex; index != endIndex; ++index) {
+                unsigned int i = order[index].index;
+                dReal lambda_i = lambda[i];
+                if (lambda_i != REAL(0.0)) {
+                    //@@@ relative error: order[i].error = dFabs(lambda[i]-last_lambda[i])/max;
+                    order[index].error = dFabs(lambda_i - last_lambda[i]);
+                }
+                else if (last_lambda[i] != REAL(0.0)) {
+                    //@@@ relative error: order[i].error = dFabs(lambda[i]-last_lambda[i])/max;
+                    order[index].error = dFabs(/*lambda_i - */last_lambda[i]); // lambda_i == 0
+                }
+                else {
+                    order[index].error = dInfinity;
+                }
+                // Finally copy the lambda for the next iteration
+                last_lambda[i] = lambda_i;
+            }
+            qsort (order + startIndex, endIndex - startIndex, sizeof(IndexError), &compare_index_error);
+        }
+    };
+
+    if (iteration > 1) { // Only reorder starting from iteration #2
+        // sort the constraints so that the ones converging slowest
+        // get solved last. use the absolute (not relative) error.
+        /*
+         *  Full reorder needs to be done.
+         *  Even though this contradicts the initial idea of moving dependent constraints
+         *  to the order end the algorithm does not work the other way well.
+         *  It looks like the iterative method needs a shake after it already found
+         *  some initial approximations and those incurred errors help it to converge even better.
+         */
+        if (ThrsafeExchange(&stage4CallContext->m_SOR_reorderHeadTaken, 1) == 0) {
+            // Process the head
+            const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+            ConstraintsReorderingHelper()(stage4CallContext, 0, localContext->m_m);
+        }
+        
+        result = true;
+    }
+    else if (iteration == 1) {
+        if (ThrsafeExchange(&stage4CallContext->m_SOR_reorderHeadTaken, 1) == 0) {
+            // Process the first half
+            const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+            unsigned int startIndex = 0;
+            unsigned int indicesCount = localContext->m_m / 2;
+            // Just copy the lambdas for the next iteration
+            memcpy(stage4CallContext->m_last_lambda + startIndex, stage4CallContext->m_lambda + startIndex, indicesCount * sizeof(dReal));
+        }
+
+        if (ThrsafeExchange(&stage4CallContext->m_SOR_reorderTailTaken, 1) == 0) {
+            // Process the second half
+            const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+            unsigned int startIndex = localContext->m_m / 2;
+            unsigned int indicesCount = localContext->m_m - startIndex;
+            // Just copy the lambdas for the next iteration
+            memcpy(stage4CallContext->m_last_lambda + startIndex, stage4CallContext->m_lambda + startIndex, indicesCount * sizeof(dReal));
+        }
+
+        // result = false; -- already 'false'
+    } 
+    else /*if (iteration < 1) */{
+        result = true; // return true on 0th iteration to build dependency map for the initial order 
+    }
+
+
+#elif CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__RANDOMLY
+
+    if (iteration != 0) {
+        dIASSERT(!dIN_RANGE(iteration, 0, RANDOM_CONSTRAINTS_REORDERING_FREQUENCY));
+
+        dIASSERT(iteration % RANDOM_CONSTRAINTS_REORDERING_FREQUENCY == RRS_REORDERING); {
+            struct ConstraintsReorderingHelper
+            {
+                void operator ()(dxQuickStepperStage4CallContext *stage4CallContext, unsigned int startIndex, unsigned int indicesCount)
+                {
+                    IndexError *order = stage4CallContext->m_order + startIndex;
+
+                    for (unsigned int index = 1; index < indicesCount; ++index) {
+                        int swapIndex = dRandInt(index + 1);
+                        IndexError tmp = order[index];
+                        order[index] = order[swapIndex];
+                        order[swapIndex] = tmp;
+                    }
+                }
+            };
+
+            /*
+             *  Full reorder needs to be done.
+             *  Even though this contradicts the initial idea of moving dependent constraints
+             *  to the order end the algorithm does not work the other way well.
+             *  It looks like the iterative method needs a shake after it already found
+             *  some initial approximations and those incurred errors help it to converge even better.
+             */
+            if (ThrsafeExchange(&stage4CallContext->m_SOR_reorderHeadTaken, 1) == 0) {
+                // Process the head
+                const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+                ConstraintsReorderingHelper()(stage4CallContext, 0, localContext->m_m);
+            }
+        }
+        dIASSERT((RRS__MAX, true)); // A reference to RRS__MAX to be located by Find in Files
+    }
+    else {
+        // Just return true and skip the randomization for the very first iteration
+    }
+
+    result = true;
+
+#else // #if CONSTRAINTS_REORDERING_METHOD != REORDERING_METHOD__BY_ERROR && CONSTRAINTS_REORDERING_METHOD != REORDERING_METHOD__RANDOMLY
+
+    dIASSERT(iteration == 0);  // The reordering request is only returned for the first iteration
+    result = true;
+
+
+#endif
+
+    return result;
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_LinksArraysZeroing(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+    
+    if (ThrsafeExchange(&stage4CallContext->m_SOR_bi_zeroHeadTaken, 1) == 0) {
+        atomicord32 *bi_links = stage4CallContext->m_bi_links_or_mi_levels;/*=[nb]*/
+        unsigned int nb = callContext->m_islandBodiesCount;
+        memset(bi_links, 0, sizeof(bi_links[0]) * (nb / 2));
+    }
+    if (ThrsafeExchange(&stage4CallContext->m_SOR_bi_zeroTailTaken, 1) == 0) {
+        atomicord32 *bi_links = stage4CallContext->m_bi_links_or_mi_levels;/*=[nb]*/
+        unsigned int nb = callContext->m_islandBodiesCount;
+        memset(bi_links + nb / 2, 0, sizeof(bi_links[0]) * (nb - nb / 2));
+    }
+
+    if (ThrsafeExchange(&stage4CallContext->m_SOR_mi_zeroHeadTaken, 1) == 0) {
+        atomicord32 *mi_links = stage4CallContext->m_mi_links;/*=[2*(m + 1)]*/
+        unsigned int m = localContext->m_m;
+        memset(mi_links, 0, sizeof(mi_links[0]) * (m + 1));
+    }
+    if (ThrsafeExchange(&stage4CallContext->m_SOR_mi_zeroTailTaken, 1) == 0) {
+        atomicord32 *mi_links = stage4CallContext->m_mi_links;/*=[2*(m + 1)]*/
+        unsigned int m = localContext->m_m;
+        memset(mi_links + (m + 1), 0, sizeof(mi_links[0]) * (m + 1));
+    }
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_DependencyMapForNewOrderRebuilding(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+    
+    atomicord32 *bi_links = stage4CallContext->m_bi_links_or_mi_levels;/*=[nb]*/
+    atomicord32 *mi_links = stage4CallContext->m_mi_links;/*=[2*(m + 1)]*/
+
+    IndexError *order = stage4CallContext->m_order;
+    const dxJBodiesItem *jb = localContext->m_jb;
+
+    unsigned int m = localContext->m_m;
+    for (unsigned int i = 0; i != m; ++i) {
+        unsigned int index = order[i].index;
+
+        int b1 = jb[index].first;
+        int b2 = jb[index].second;
+
+        unsigned int encioded_i = dxENCODE_INDEX(i);
+
+        unsigned int encoded_depi = bi_links[(unsigned int)b1];
+        bi_links[(unsigned int)b1] = encioded_i;
+
+        if (b2 != -1 && b2 != b1) {
+            if (encoded_depi < (unsigned int)bi_links[(unsigned int)b2]) {
+                encoded_depi = bi_links[(unsigned int)b2];
+            }
+            bi_links[(unsigned int)b2] = encioded_i;
+        }
+
+        // OD: There is also a dependency on findex[index],
+        // however the findex can only refer to the rows of the same joint 
+        // and hence that index is going to have the same bodies. Since the 
+        // indices are sorted in a way that the meaningful findex values 
+        // always come last, the dependency of findex[index] is going to
+        // be implicitly satisfied via matching bodies at smaller "i"s.
+
+        // Check that the dependency targets an earlier "i"
+        dIASSERT(encoded_depi < encioded_i);
+
+        unsigned encoded_downi = mi_links[(sizeint)encoded_depi * 2 + 1];
+        mi_links[(sizeint)encoded_depi * 2 + 1] = encioded_i; // Link i as down-dependency for depi
+        mi_links[(sizeint)encioded_i * 2 + 0] = encoded_downi; // Link previous down-chain as the level-dependency with i
+    }
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_DependencyMapFromSavedLevelsReconstruction(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    atomicord32 *mi_levels = stage4CallContext->m_bi_links_or_mi_levels;/*=[m]*/
+    atomicord32 *mi_links = stage4CallContext->m_mi_links;/*=[2*(m + 1)]*/
+
+    // NOTE! 
+    // OD: The mi_links array is not zero-filled before the reconstruction.
+    // Iteration ends with all the down links zeroed. And since down links
+    // are moved to the next level links when parent-child relations are established,
+    // the horizontal levels are properly terminated. 
+    // The leaf nodes had their links zero-initialized initially 
+    // and those zeros remain intact during the solving. This way the down links
+    // are properly terminated as well.
+    // This is very obscure and error prone and would need an assertion check at least
+    // but the simplest assertion approach I can imagine would be 
+    // zero filling and building another tree with the memory buffer comparison afterwards.
+    // That would be stupid, obviously.
+    //
+    // NOTE!
+    // OD: This routine can be threaded. However having two threads messing 
+    // in one integer array with random access and kicking each other memory lines 
+    // out of cache would probably work worse than letting a single thread do the whole job.
+    unsigned int m = localContext->m_m;
+    for (unsigned int i = 0; i != m; ++i) {
+        unsigned int currentLevelRoot = mi_levels[i];
+        unsigned int currentLevelFirstLink = mi_links[2 * (sizeint)currentLevelRoot + 1];
+        unsigned int encoded_i = dxENCODE_INDEX(i);
+        mi_links[2 * (sizeint)currentLevelRoot + 1] = encoded_i;
+        mi_links[2 * (sizeint)encoded_i + 0] = currentLevelFirstLink;
+    }
+
+    // Additionally reset available level root's list head
+    mi_links[2 * dxHEAD_INDEX + 0] = dxHEAD_INDEX;
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_ConstraintsReorderingSync_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    unsigned int stage4LCP_Iteration_allowedThreads = stage4CallContext->m_LCP_IterationAllowedThreads;
+
+    stage4CallContext->RecordLCP_IterationStart(stage4LCP_Iteration_allowedThreads, callThisReleasee);
+
+    unsigned knownToBeCompletedLevel = dxHEAD_INDEX;
+    if (stage4LCP_Iteration_allowedThreads > 1) {
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(callThisReleasee, stage4LCP_Iteration_allowedThreads - 1);
+        world->PostThreadedCallsIndexOverridenGroup(NULL, stage4LCP_Iteration_allowedThreads - 1, callThisReleasee, &dxQuickStepIsland_Stage4LCP_Iteration_Callback, stage4CallContext, knownToBeCompletedLevel, "QuickStepIsland Stage4LCP_Iteration");
+    }
+    dxQuickStepIsland_Stage4LCP_MTIteration(stage4CallContext, knownToBeCompletedLevel);
+
+    return 1;
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_Iteration_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    unsigned int initiallyKnownToBeCompletedLevel = (unsigned int)callInstanceIndex;
+    dIASSERT(initiallyKnownToBeCompletedLevel == callInstanceIndex); // A truncation check...
+
+    dxQuickStepIsland_Stage4LCP_MTIteration(stage4CallContext, initiallyKnownToBeCompletedLevel);
+    return 1;
+}
+
+/*
+ *	       +0                +0
+ * Root───┬─────────────────┬──...
+ *      +1│               +1│
+ *       ┌┴┐+0   ┌─┐+0      .
+ *       │A├─────┤B├─...
+ *       └┬┘     └┬┘
+ *      +1│     +1│
+ *       ┌┴┐+0    .
+ *       │C├─...
+ *       └┬┘
+ *      +1│
+ *        .
+ *
+ *  Lower tree levels depend on their parents. Same level nodes are independent with respect to each other.
+ *
+ *  1. B is linked in place of A
+ *  2. A is processed
+ *  3. C is inserted at the Root level
+ *
+ *  The tree starts with a single child subtree at the root level ("down" link of slot #0 is used for that). 
+ *  Then, additional "C" nodes are added to the root level by building horizontal link via slots of 
+ *  their former parent "A"s that had become free.
+ *  The "level" link of slot #0 is used to find the root level head.
+ *
+ *  Since the tree is altered during iteration, mi_levels record each node parents so that the tree could be reconstructed.
+ */
+static 
+void dxQuickStepIsland_Stage4LCP_MTIteration(dxQuickStepperStage4CallContext *stage4CallContext, unsigned int initiallyKnownToBeCompletedLevel)
+{
+    atomicord32 *mi_levels = stage4CallContext->m_bi_links_or_mi_levels;
+    atomicord32 *mi_links = stage4CallContext->m_mi_links;
+
+    unsigned int knownToBeCompletedLevel = initiallyKnownToBeCompletedLevel;
+
+    while (true) {
+        unsigned int initialLevelRoot = mi_links[2 * dxHEAD_INDEX + 0];
+        if (initialLevelRoot != dxHEAD_INDEX && initialLevelRoot == knownToBeCompletedLevel) {
+            // No work is (currently) available
+            break;
+        }
+        
+        for (unsigned int currentLevelRoot = initialLevelRoot; ; currentLevelRoot = mi_links[2 * (sizeint)currentLevelRoot + 0]) {
+            while (true) {
+                const unsigned invalid_link = dxENCODE_INDEX(-1);
+
+                unsigned currentLevelFirstLink = mi_links[2 * (sizeint)currentLevelRoot + 1];
+                if (currentLevelFirstLink == invalid_link) {
+                    break;
+                }
+                
+                // Try to extract first record from linked list
+                unsigned currentLevelNextLink = mi_links[2 * (sizeint)currentLevelFirstLink + 0];
+                if (ThrsafeCompareExchange(&mi_links[2 * (sizeint)currentLevelRoot + 1], currentLevelFirstLink, currentLevelNextLink)) {
+                    // if succeeded, execute selected iteration step...
+                    dxQuickStepIsland_Stage4LCP_IterationStep(stage4CallContext, dxDECODE_INDEX(currentLevelFirstLink));
+
+                    // Check if there are any dependencies
+                    unsigned level0DownLink = mi_links[2 * (sizeint)currentLevelFirstLink + 1];
+                    if (level0DownLink != invalid_link) {
+                        // ...and if yes, insert the record into the list of available level roots
+                        unsigned int levelRootsFirst;
+                        do {
+                            levelRootsFirst = mi_links[2 * dxHEAD_INDEX + 0];
+                            mi_links[2 * (sizeint)currentLevelFirstLink + 0] = levelRootsFirst;
+                        }
+                        while (!ThrsafeCompareExchange(&mi_links[2 * dxHEAD_INDEX + 0], levelRootsFirst, currentLevelFirstLink));
+
+                        // If another level was added and some threads have already exited...
+                        unsigned int threadsTotal = stage4CallContext->m_LCP_iterationThreadsTotal;
+                        unsigned int threadsRemaining = ThrsafeIncrementIntUpToLimit(&stage4CallContext->m_LCP_iterationThreadsRemaining, threadsTotal);
+                        if (threadsRemaining != threadsTotal) {
+                            // ...go on an schedule one more...
+                            const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+                            dxWorld *world = callContext->m_world;
+                            // ...passing knownToBeCompletedLevel as the initial one for the spawned call
+                            world->PostThreadedCallForUnawareReleasee(NULL, NULL, 0, stage4CallContext->m_LCP_iterationNextReleasee, NULL, &dxQuickStepIsland_Stage4LCP_Iteration_Callback, stage4CallContext, knownToBeCompletedLevel, "QuickStepIsland Stage4LCP_Iteration");
+                            // NOTE: it's hard to predict whether it is reasonable to re-post a call
+                            // each time a new level is added (provided some calls have already exited, of course).
+                            // The efficiency very much depends on dependencies patterns between levels 
+                            // (i.e. it depends on the amount of available work added with each level).
+                            // The strategy of re-posting exited calls as frequently as possible
+                            // leads to potential wasting execution cycles in some cores for the aid
+                            // of keeping other cores busy as much as possible and not letting all the
+                            // work be executed by just a partial cores subset. With emergency of large
+                            // available work amounts (the work that is not dependent on anything and 
+                            // ready to be executed immediately) this strategy is going to transit into 
+                            // full cores set being busy executing useful work. If amounts of work 
+                            // emerging from added levels are small, the strategy should lead to 
+                            // approximately the same efficiency as if the work was done by only a cores subset 
+                            // with the remaining cores wasting (some) cycles for re-scheduling calls 
+                            // to those busy cores rather than being idle or handling other islands. 
+                        }
+                    }
+
+                    // Finally record the root index of current record's level
+                    mi_levels[dxDECODE_INDEX(currentLevelFirstLink)] = currentLevelRoot;
+                }
+            }
+
+            if (currentLevelRoot == knownToBeCompletedLevel) {
+                break;
+            }
+            dIASSERT(currentLevelRoot != dxHEAD_INDEX); // Zero level is expected to be the deepest one in the list and execution must not loop past it.
+        }
+        // Save the level root we started from as known to be completed
+        knownToBeCompletedLevel = initialLevelRoot;
+    }
+
+    // Decrement running threads count on exit
+    ThrsafeAdd(&stage4CallContext->m_LCP_iterationThreadsRemaining, (atomicord32)(-1));
+}
+
+static 
+void dxQuickStepIsland_Stage4LCP_STIteration(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    unsigned int m = localContext->m_m;
+    for (unsigned int i = 0; i != m; ++i) {
+        dxQuickStepIsland_Stage4LCP_IterationStep(stage4CallContext, i);
+    }
+}
+
+//***************************************************************************
+// SOR-LCP method
+
+// nb is the number of bodies in the body array.
+// J is an m*16 matrix of constraint rows with rhs, cfm, lo and hi in padding
+// jb is an array of first and second body numbers for each constraint row
+// invI is the global frame inverse inertia for each body (stacked 3x3 matrices)
+//
+// this returns lambda and fc (the constraint force).
+// note: fc is returned as inv(M)*J'*lambda, the constraint force is actually J'*lambda
+//
+// b, lo and hi are modified on exit
+
+static 
+void dxQuickStepIsland_Stage4LCP_IterationStep(dxQuickStepperStage4CallContext *stage4CallContext, unsigned int i)
+{
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    IndexError *order = stage4CallContext->m_order;
+    unsigned int index = order[i].index;
+
+    dReal *fc_ptr1;
+    dReal *fc_ptr2 = NULL;
+    dReal delta;
+
+    dReal *lambda = stage4CallContext->m_lambda;
+    dReal old_lambda = lambda[index];
+
+    dReal *J = localContext->m_J;
+    const dReal *J_ptr = J + (sizeint)index * JME__MAX;
+
+    {
+        delta = J_ptr[JME_RHS] - old_lambda * J_ptr[JME_CFM];
+
+        dReal *fc = stage4CallContext->m_cforce;
+
+        const dxJBodiesItem *jb = localContext->m_jb;
+        int b2 = jb[index].second;
+        int b1 = jb[index].first;
+
+        // @@@ potential optimization: SIMD-ize this and the b2 >= 0 case
+        fc_ptr1 = fc + (sizeint)(unsigned)b1 * CFE__MAX;
+        delta -= fc_ptr1[CFE_LX] * J_ptr[JME_J1LX] + fc_ptr1[CFE_LY] * J_ptr[JME_J1LY] +
+            fc_ptr1[CFE_LZ] * J_ptr[JME_J1LZ] + fc_ptr1[CFE_AX] * J_ptr[JME_J1AX] +
+            fc_ptr1[CFE_AY] * J_ptr[JME_J1AY] + fc_ptr1[CFE_AZ] * J_ptr[JME_J1AZ];
+        // @@@ potential optimization: handle 1-body constraints in a separate
+        //     loop to avoid the cost of test & jump?
+        if (b2 != -1) {
+            fc_ptr2 = fc + (sizeint)(unsigned)b2 * CFE__MAX;
+            delta -= fc_ptr2[CFE_LX] * J_ptr[JME_J2LX] + fc_ptr2[CFE_LY] * J_ptr[JME_J2LY] +
+                fc_ptr2[CFE_LZ] * J_ptr[JME_J2LZ] + fc_ptr2[CFE_AX] * J_ptr[JME_J2AX] +
+                fc_ptr2[CFE_AY] * J_ptr[JME_J2AY] + fc_ptr2[CFE_AZ] * J_ptr[JME_J2AZ];
+        }
+    }
+
+    {
+        dReal hi_act, lo_act;
+
+        // set the limits for this constraint. 
+        // this is the place where the QuickStep method differs from the
+        // direct LCP solving method, since that method only performs this
+        // limit adjustment once per time step, whereas this method performs
+        // once per iteration per constraint row.
+        // the constraints are ordered so that all lambda[] values needed have
+        // already been computed.
+        const int *findex = localContext->m_findex;
+        if (findex[index] != -1) {
+            hi_act = dFabs (J_ptr[JME_HI] * lambda[(unsigned)findex[index]]);
+            lo_act = -hi_act;
+        } else {
+            hi_act = J_ptr[JME_HI];
+            lo_act = J_ptr[JME_LO];
+        }
+
+        // compute lambda and clamp it to [lo,hi].
+        // @@@ potential optimization: does SSE have clamping instructions
+        //     to save test+jump penalties here?
+        dReal new_lambda = old_lambda + delta;
+        if (new_lambda < lo_act) {
+            delta = lo_act - old_lambda;
+            lambda[index] = lo_act;
+        }
+        else if (new_lambda > hi_act) {
+            delta = hi_act - old_lambda;
+            lambda[index] = hi_act;
+        }
+        else {
+            lambda[index] = new_lambda;
+        }
+    }
+
+    //@@@ a trick that may or may not help
+    //dReal ramp = (1-((dReal)(iteration+1)/(dReal)num_iterations));
+    //delta *= ramp;
+
+    {
+        dReal *iMJ = stage4CallContext->m_iMJ;
+        const dReal *iMJ_ptr = iMJ + (sizeint)index * IMJ__MAX;
+        // update fc.
+        // @@@ potential optimization: SIMD for this and the b2 >= 0 case
+        fc_ptr1[CFE_LX] += delta * iMJ_ptr[IMJ_1LX];
+        fc_ptr1[CFE_LY] += delta * iMJ_ptr[IMJ_1LY];
+        fc_ptr1[CFE_LZ] += delta * iMJ_ptr[IMJ_1LZ];
+        fc_ptr1[CFE_AX] += delta * iMJ_ptr[IMJ_1AX];
+        fc_ptr1[CFE_AY] += delta * iMJ_ptr[IMJ_1AY];
+        fc_ptr1[CFE_AZ] += delta * iMJ_ptr[IMJ_1AZ];
+        // @@@ potential optimization: handle 1-body constraints in a separate
+        //     loop to avoid the cost of test & jump?
+        if (fc_ptr2) {
+            fc_ptr2[CFE_LX] += delta * iMJ_ptr[IMJ_2LX];
+            fc_ptr2[CFE_LY] += delta * iMJ_ptr[IMJ_2LY];
+            fc_ptr2[CFE_LZ] += delta * iMJ_ptr[IMJ_2LZ];
+            fc_ptr2[CFE_AX] += delta * iMJ_ptr[IMJ_2AX];
+            fc_ptr2[CFE_AY] += delta * iMJ_ptr[IMJ_2AY];
+            fc_ptr2[CFE_AZ] += delta * iMJ_ptr[IMJ_2AZ];
+        }
+    }
+}
+
+static inline 
+bool IsStage4bJointInfosIterationRequired(const dxQuickStepperLocalContext *localContext)
+{
+    return 
+#ifdef WARM_STARTING
+        true ||      
+#endif
+        localContext->m_mfb > 0;
+}
+
+static 
+int dxQuickStepIsland_Stage4LCP_IterationSync_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+    
+    unsigned int stage4b_allowedThreads = 1;
+    if (IsStage4bJointInfosIterationRequired(localContext)) {
+        unsigned int allowedThreads = callContext->m_stepperAllowedThreads;
+        dIASSERT(allowedThreads >= stage4b_allowedThreads);
+        stage4b_allowedThreads += CalculateOptimalThreadsCount<dxQUICKSTEPISLAND_STAGE4B_STEP>(localContext->m_nj, allowedThreads - stage4b_allowedThreads);
+    }
+
+    if (stage4b_allowedThreads > 1) {
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(callThisReleasee, stage4b_allowedThreads - 1);
+        world->PostThreadedCallsGroup(NULL, stage4b_allowedThreads - 1, callThisReleasee, &dxQuickStepIsland_Stage4b_Callback, stage4CallContext, "QuickStepIsland Stage4b");
+    }
+    dxQuickStepIsland_Stage4b(stage4CallContext);
+    
+    return 1;
+}
+
+static 
+int dxQuickStepIsland_Stage4b_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage4CallContext *stage4CallContext = (dxQuickStepperStage4CallContext *)_stage4CallContext;
+    dxQuickStepIsland_Stage4b(stage4CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage4b(dxQuickStepperStage4CallContext *stage4CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    if (ThrsafeExchange(&stage4CallContext->m_cf_4b, 1) == 0) {
+        dxBody * const *body = callContext->m_islandBodiesStart;
+        unsigned int nb = callContext->m_islandBodiesCount;
+        const dReal *cforce = stage4CallContext->m_cforce;
+        dReal stepsize = callContext->m_stepSize;
+        // add stepsize * cforce to the body velocity
+        const dReal *cforcecurr = cforce;
+        dxBody *const *const bodyend = body + nb;
+        for (dxBody *const *bodycurr = body; bodycurr != bodyend; cforcecurr += CFE__MAX, bodycurr++) {
+            dxBody *b = *bodycurr;
+            for (unsigned int j = dSA__MIN; j != dSA__MAX; j++) {
+                b->lvel[dV3E__AXES_MIN + j] += stepsize * cforcecurr[CFE__L_MIN + j];
+                b->avel[dV3E__AXES_MIN + j] += stepsize * cforcecurr[CFE__A_MIN + j];
+            }
+        }
+    }
+
+
+    // note that the SOR method overwrites rhs and J at this point, so
+    // they should not be used again.
+
+    if (IsStage4bJointInfosIterationRequired(localContext)) {
+        dReal data[JVE__MAX];
+        const dReal *Jcopy = localContext->m_Jcopy;
+        const dReal *lambda = stage4CallContext->m_lambda;
+        const dxMIndexItem *mindex = localContext->m_mindex;
+        dJointWithInfo1 *jointinfos = localContext->m_jointinfos;
+
+        unsigned int nj = localContext->m_nj;
+        const unsigned int step_size = dxQUICKSTEPISLAND_STAGE4B_STEP;
+        unsigned int nj_steps = (nj + (step_size - 1)) / step_size;
+
+        unsigned ji_step;
+        while ((ji_step = ThrsafeIncrementIntUpToLimit(&stage4CallContext->m_ji_4b, nj_steps)) != nj_steps) {
+            unsigned int ji = ji_step * step_size;
+            const unsigned int jiend = ji + dMIN(step_size, nj - ji);
+
+            const dReal *Jcopycurr = Jcopy + (sizeint)mindex[ji].fbIndex * JCE__MAX;
+
+            while (true) {
+                // straightforward computation of joint constraint forces:
+                // multiply related lambdas with respective J' block for joints
+                // where feedback was requested
+                const unsigned int fb_infom = mindex[ji + 1].fbIndex - mindex[ji].fbIndex;
+                if (fb_infom != 0) {
+                    dIASSERT(fb_infom == mindex[ji + 1].mIndex - mindex[ji].mIndex);
+
+                    const dReal *lambdacurr = lambda + mindex[ji].mIndex;
+                    dxJoint *joint = jointinfos[ji].joint;
+
+#ifdef WARM_STARTING
+                    memcpy(joint->lambda, lambdacurr, fb_infom * sizeof(dReal));
+#endif
+
+                    dJointFeedback *fb = joint->feedback;
+
+                    if (joint->node[1].body) {
+                        Multiply1_12q1 (data, Jcopycurr + JCE__J2_MIN, lambdacurr, fb_infom);
+                        dSASSERT(JCE__MAX == 12);
+
+                        fb->f2[dSA_X] = data[JVE_LX];
+                        fb->f2[dSA_Y] = data[JVE_LY];
+                        fb->f2[dSA_Z] = data[JVE_LZ];
+                        fb->t2[dSA_X] = data[JVE_AX];
+                        fb->t2[dSA_Y] = data[JVE_AY];
+                        fb->t2[dSA_Z] = data[JVE_AZ];
+                    }
+
+                    Multiply1_12q1 (data, Jcopycurr + JCE__J1_MIN, lambdacurr, fb_infom);
+                    dSASSERT(JCE__MAX == 12);
+
+                    fb->f1[dSA_X] = data[JVE_LX];
+                    fb->f1[dSA_Y] = data[JVE_LY];
+                    fb->f1[dSA_Z] = data[JVE_LZ];
+                    fb->t1[dSA_X] = data[JVE_AX];
+                    fb->t1[dSA_Y] = data[JVE_AY];
+                    fb->t1[dSA_Z] = data[JVE_AZ];
+
+                    Jcopycurr += fb_infom * JCE__MAX;
+                }
+                else {
+#ifdef WARM_STARTING
+                    const dReal *lambdacurr = lambda + mindex[ji].mIndex;
+                    const unsigned int infom = mindex[ji + 1].mIndex - mindex[ji].mIndex;
+                    dxJoint *joint = jointinfos[ji].joint;
+                    memcpy(joint->lambda, lambdacurr, infom * sizeof(dReal));
+#endif
+                }
+
+                if (++ji == jiend) {
+                    break;
+                }
+            }
+        }
+    }
+}
+
+static 
+int dxQuickStepIsland_Stage5_Callback(void *_stage5CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage5CallContext *stage5CallContext = (dxQuickStepperStage5CallContext *)_stage5CallContext;
+    dxQuickStepIsland_Stage5(stage5CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage5(dxQuickStepperStage5CallContext *stage5CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage5CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage5CallContext->m_localContext;
+
+    dxWorldProcessMemArena *memarena = callContext->m_stepperArena;
+    memarena->RestoreState(stage5CallContext->m_stage3MemArenaState);
+    stage5CallContext = NULL; // WARNING! stage3CallContext is not valid after this point!
+    dIVERIFY(stage5CallContext == NULL); // To suppress unused variable assignment warnings
+
+    dxQuickStepperStage6CallContext *stage6CallContext = (dxQuickStepperStage6CallContext *)memarena->AllocateBlock(sizeof(dxQuickStepperStage6CallContext));
+    stage6CallContext->Initialize(callContext, localContext);
+
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+    dIASSERT(allowedThreads >= 1);
+
+    if (allowedThreads == 1) {
+        IFTIMING (dTimerNow ("compute velocity update"));
+        dxQuickStepIsland_Stage6a(stage6CallContext);
+        dxQuickStepIsland_Stage6_VelocityCheck(stage6CallContext);
+        IFTIMING (dTimerNow ("update position and tidy up"));
+        dxQuickStepIsland_Stage6b(stage6CallContext);
+        IFTIMING (dTimerEnd());
+        IFTIMING (if (m > 0) dTimerReport (stdout,1));
+    }
+    else {
+        unsigned int nb = callContext->m_islandBodiesCount;
+        unsigned int stage6a_allowedThreads = CalculateOptimalThreadsCount<dxQUICKSTEPISLAND_STAGE6A_STEP>(nb, allowedThreads);
+
+        dxWorld *world = callContext->m_world;
+
+        dCallReleaseeID stage6aSyncReleasee;
+        world->PostThreadedCallForUnawareReleasee(NULL, &stage6aSyncReleasee, stage6a_allowedThreads, callContext->m_finalReleasee, 
+            NULL, &dxQuickStepIsland_Stage6aSync_Callback, stage6CallContext, 0, "QuickStepIsland Stage6a Sync");
+
+        if (stage6a_allowedThreads > 1) {
+            world->PostThreadedCallsGroup(NULL, stage6a_allowedThreads - 1, stage6aSyncReleasee, &dxQuickStepIsland_Stage6a_Callback, stage6CallContext, "QuickStepIsland Stage6a");
+        }
+        dxQuickStepIsland_Stage6a(stage6CallContext);
+        world->AlterThreadedCallDependenciesCount(stage6aSyncReleasee, -1);
+    }
+}
+
+
+static 
+int dxQuickStepIsland_Stage6a_Callback(void *_stage6CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage6CallContext *stage6CallContext = (dxQuickStepperStage6CallContext *)_stage6CallContext;
+    dxQuickStepIsland_Stage6a(stage6CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage6a(dxQuickStepperStage6CallContext *stage6CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage6CallContext->m_stepperCallContext;
+    const dxQuickStepperLocalContext *localContext = stage6CallContext->m_localContext;
+
+    dReal stepsize = callContext->m_stepSize;
+    dReal *invI = localContext->m_invI;
+    dxBody * const *body = callContext->m_islandBodiesStart;
+
+    unsigned int nb = callContext->m_islandBodiesCount;
+    const unsigned int step_size = dxQUICKSTEPISLAND_STAGE6A_STEP;
+    unsigned int nb_steps = (nb + (step_size - 1)) / step_size;
+
+    unsigned bi_step;
+    while ((bi_step = ThrsafeIncrementIntUpToLimit(&stage6CallContext->m_bi_6a, nb_steps)) != nb_steps) {
+        unsigned int bi = bi_step * step_size;
+        unsigned int bicnt = dMIN(step_size, nb - bi);
+
+        const dReal *invIrow = invI + (sizeint)bi * IIE__MAX;
+        dxBody *const *bodycurr = body + bi;
+        dxBody *const *bodyend = bodycurr + bicnt;
+        while (true) {
+            // compute the velocity update:
+            // add stepsize * invM * fe to the body velocity
+            dxBody *b = *bodycurr;
+            dReal body_invMass_mul_stepsize = stepsize * b->invMass;
+            for (unsigned int j = dSA__MIN; j != dSA__MAX; ++j) {
+                b->lvel[dV3E__AXES_MIN + j] += body_invMass_mul_stepsize * b->facc[dV3E__AXES_MIN + j];
+                b->tacc[dV3E__AXES_MIN + j] *= stepsize;
+            }
+            dMultiplyAdd0_331 (b->avel, invIrow + IIE__MATRIX_MIN, b->tacc);
+            
+            if (++bodycurr == bodyend) {
+                break;
+            }
+            invIrow += IIE__MAX;
+        }
+    }
+}
+
+
+static 
+int dxQuickStepIsland_Stage6aSync_Callback(void *_stage6CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage6CallContext *stage6CallContext = (dxQuickStepperStage6CallContext *)_stage6CallContext;
+    dxQuickStepIsland_Stage6_VelocityCheck(stage6CallContext);
+
+    const dxStepperProcessingCallContext *callContext = stage6CallContext->m_stepperCallContext;
+
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+    unsigned int nb = callContext->m_islandBodiesCount;
+    unsigned int stage6b_allowedThreads = CalculateOptimalThreadsCount<dxQUICKSTEPISLAND_STAGE6B_STEP>(nb, allowedThreads);
+
+    if (stage6b_allowedThreads > 1) {
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(callThisReleasee, stage6b_allowedThreads - 1);
+        world->PostThreadedCallsGroup(NULL, stage6b_allowedThreads - 1, callThisReleasee, &dxQuickStepIsland_Stage6b_Callback, stage6CallContext, "QuickStepIsland Stage6b");
+    }
+    dxQuickStepIsland_Stage6b(stage6CallContext);
+
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage6_VelocityCheck(dxQuickStepperStage6CallContext *stage6CallContext)
+{
+    (void)stage6CallContext; // can be unused
+#ifdef CHECK_VELOCITY_OBEYS_CONSTRAINT
+    const dxQuickStepperLocalContext *localContext = stage6CallContext->m_localContext;
+
+    unsigned int m = localContext->m_m;
+    if (m > 0) {
+        const dxStepperProcessingCallContext *callContext = stage6CallContext->m_stepperCallContext;
+        dxBody * const *body = callContext->m_islandBodiesStart;
+        dReal *J = localContext->m_J;
+        const dxJBodiesItem *jb = localContext->m_jb;
+
+        dReal error = 0;
+        const dReal* J_ptr = J;
+        for (unsigned int i = 0; i < m; ++i) {
+            int b1 = jb[i].first;
+            int b2 = jb[i].second;
+            dReal sum = 0;
+            dxBody *bodycurr = body[(unsigned)b1];
+            for (unsigned int j = dSA__MIN; j != dSA__MAX; ++j) sum += J_ptr[JME__J1L_MIN + j] * bodycurr->lvel[dV3E__AXES_MIN + j] + J_ptr[JME__J1A_MIN + j] * bodycurr->avel[dV3E__AXES_MIN + j];
+            if (b2 != -1) {
+                dxBody *bodycurr = body[(unsigned)b2];
+                for (unsigned int k = dSA__MIN; k != dSA__MAX; ++k) sum += J_ptr[JME__J2L_MIN + k] * bodycurr->lvel[dV3E__AXES_MIN + k] + J_ptr[JME__J2A_MIN + k] * bodycurr->avel[dV3E__AXES_MIN + k];
+            }
+            J_ptr += JME__MAX;
+            error += dFabs(sum);
+        }
+        printf ("velocity error = %10.6e\n", error);
+    }
+#endif
+}
+
+static 
+int dxQuickStepIsland_Stage6b_Callback(void *_stage6CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxQuickStepperStage6CallContext *stage6CallContext = (dxQuickStepperStage6CallContext *)_stage6CallContext;
+    dxQuickStepIsland_Stage6b(stage6CallContext);
+    return 1;
+}
+
+static 
+void dxQuickStepIsland_Stage6b(dxQuickStepperStage6CallContext *stage6CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage6CallContext->m_stepperCallContext;
+
+    dReal stepsize = callContext->m_stepSize;
+    dxBody * const *body = callContext->m_islandBodiesStart;
+
+    // update the position and orientation from the new linear/angular velocity
+    // (over the given timestep)
+    unsigned int nb = callContext->m_islandBodiesCount;
+    const unsigned int step_size = dxQUICKSTEPISLAND_STAGE6B_STEP;
+    unsigned int nb_steps = (nb + (step_size - 1)) / step_size;
+
+    unsigned bi_step;
+    while ((bi_step = ThrsafeIncrementIntUpToLimit(&stage6CallContext->m_bi_6b, nb_steps)) != nb_steps) {
+        unsigned int bi = bi_step * step_size;
+        unsigned int bicnt = dMIN(step_size, nb - bi);
+
+        dxBody *const *bodycurr = body + bi;
+        dxBody *const *bodyend = bodycurr + bicnt;
+        while (true) {
+            dxBody *b = *bodycurr;
+            dxStepBody (b, stepsize);
+            dZeroVector3 (b->facc);
+            dZeroVector3 (b->tacc);
+            if (++bodycurr == bodyend) {
+                break;
+            }
+        }
+    }
+}
+
+
+
+/*extern */
+sizeint dxEstimateQuickStepMemoryRequirements (dxBody * const *body,
+                                              unsigned int nb,
+                                              dxJoint * const *_joint,
+                                              unsigned int _nj)
+{
+    (void)body; // unused
+    unsigned int nj, m, mfb;
+
+    {
+        unsigned int njcurr = 0, mcurr = 0, mfbcurr = 0;
+        dxJoint::SureMaxInfo info;
+        dxJoint *const *const _jend = _joint + _nj;
+        for (dxJoint *const *_jcurr = _joint; _jcurr != _jend; _jcurr++) {	
+            dxJoint *j = *_jcurr;
+            j->getSureMaxInfo (&info);
+
+            unsigned int jm = info.max_m;
+            if (jm > 0) {
+                njcurr++;
+
+                mcurr += jm;
+                if (j->feedback)
+                    mfbcurr += jm;
+            }
+        }
+        nj = njcurr; m = mcurr; mfb = mfbcurr;
+    }
+
+    sizeint res = 0;
+
+    res += dOVERALIGNED_SIZE(sizeof(dReal) * IIE__MAX * nb, INVI_ALIGNMENT); // for invI
+
+    {
+        sizeint sub1_res1 = dEFFICIENT_SIZE(sizeof(dJointWithInfo1) * _nj); // for initial jointinfos
+
+        sizeint sub1_res2 = dEFFICIENT_SIZE(sizeof(dJointWithInfo1) * nj); // for shrunk jointinfos
+        sub1_res2 += dEFFICIENT_SIZE(sizeof(dxQuickStepperLocalContext)); // for dxQuickStepLocalContext
+        if (m > 0) {
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(dxMIndexItem) * (nj + 1)); // for mindex
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(dxJBodiesItem) * m); // for jb
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(int) * m); // for findex
+            sub1_res2 += dOVERALIGNED_SIZE(sizeof(dReal) * JME__MAX * m, JACOBIAN_ALIGNMENT); // for J
+            sub1_res2 += dOVERALIGNED_SIZE(sizeof(dReal) * JCE__MAX * mfb, JCOPY_ALIGNMENT); // for Jcopy
+            {
+                sizeint sub2_res1 = dEFFICIENT_SIZE(sizeof(dxQuickStepperStage3CallContext)); // for dxQuickStepperStage3CallContext
+                sub2_res1 += dEFFICIENT_SIZE(sizeof(dReal) * RHS__MAX * nb); // for rhs_tmp
+                sub2_res1 += dEFFICIENT_SIZE(sizeof(dxQuickStepperStage2CallContext)); // for dxQuickStepperStage2CallContext
+
+                sizeint sub2_res2 = 0;
+                {
+                    sizeint sub3_res1 = dEFFICIENT_SIZE(sizeof(dxQuickStepperStage5CallContext)); // for dxQuickStepperStage5CallContext;
+                    sub3_res1 += dEFFICIENT_SIZE(sizeof(dReal) * m); // for lambda
+                    sub3_res1 += dEFFICIENT_SIZE(sizeof(dReal) * CFE__MAX * nb); // for cforce
+                    sub3_res1 += dOVERALIGNED_SIZE(sizeof(dReal) * IMJ__MAX * m, INVMJ_ALIGNMENT); // for iMJ
+                    sub3_res1 += dEFFICIENT_SIZE(sizeof(IndexError) * m); // for order
+#if CONSTRAINTS_REORDERING_METHOD == REORDERING_METHOD__BY_ERROR
+                    sub3_res1 += dEFFICIENT_SIZE(sizeof(dReal) * m); // for last_lambda
+#endif
+#if !dTHREADING_INTF_DISABLED
+                    sub3_res1 += dEFFICIENT_SIZE(sizeof(atomicord32) * dMAX(nb, m)); // for bi_links_or_mi_levels
+                    sub3_res1 += dEFFICIENT_SIZE(sizeof(atomicord32) * 2 * ((sizeint)m + 1)); // for mi_links
+#endif
+                    sub3_res1 += dEFFICIENT_SIZE(sizeof(dxQuickStepperStage4CallContext)); // for dxQuickStepperStage4CallContext;
+
+                    sizeint sub3_res2 = dEFFICIENT_SIZE(sizeof(dxQuickStepperStage6CallContext)); // for dxQuickStepperStage6CallContext;
+                    
+                    sub2_res2 += dMAX(sub3_res1, sub3_res2);
+                }
+
+                sub1_res2 += dMAX(sub2_res1, sub2_res2);
+            }
+        }
+        else {
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(dxQuickStepperStage3CallContext)); // for dxQuickStepperStage3CallContext
+        }
+
+        sizeint sub1_res12_max = dMAX(sub1_res1, sub1_res2);
+        sizeint stage01_contexts = dEFFICIENT_SIZE(sizeof(dxQuickStepperStage0BodiesCallContext))
+            + dEFFICIENT_SIZE(sizeof(dxQuickStepperStage0JointsCallContext))
+            + dEFFICIENT_SIZE(sizeof(dxQuickStepperStage1CallContext));
+        res += dMAX(sub1_res12_max, stage01_contexts);
+    }
+
+    return res;
+}
+
+/*extern */
+unsigned dxEstimateQuickStepMaxCallCount(unsigned activeThreadCount, unsigned allowedThreadCount)
+{
+    (void)activeThreadCount; // unused
+    unsigned result = 1 // dxQuickStepIsland itself
+        + 5 + (2 * allowedThreadCount + 1) // for Stage4 related schedules
+        + 1 // dxStepIsland_Stage5
+        + allowedThreadCount; // Reserve
+    return result;
+}
+
diff --git a/libs/ode-0.16.1/ode/src/quickstep.h b/libs/ode-0.16.1/ode/src/quickstep.h
new file mode 100644
index 0000000..4433e9c
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/quickstep.h
@@ -0,0 +1,39 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_QUICK_STEP_H_
+#define _ODE_QUICK_STEP_H_
+
+#include <ode/common.h>
+
+struct dxStepperProcessingCallContext;
+
+
+sizeint dxEstimateQuickStepMemoryRequirements(
+    dxBody * const *body, unsigned int nb, dxJoint * const *_joint, unsigned int _nj);
+unsigned dxEstimateQuickStepMaxCallCount(
+    unsigned activeThreadCount, unsigned allowedThreadCount);
+
+void dxQuickStepIsland(const dxStepperProcessingCallContext *callContext);
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/ray.cpp b/libs/ode-0.16.1/ode/src/ray.cpp
new file mode 100644
index 0000000..7709e8b
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/ray.cpp
@@ -0,0 +1,735 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+standard ODE geometry primitives: public API and pairwise collision functions.
+
+the rule is that only the low level primitive collision functions should set
+dContactGeom::g1 and dContactGeom::g2.
+
+*/
+
+#include <ode/common.h>
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"
+#include "collision_std.h"
+#include "collision_util.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+//****************************************************************************
+// ray public API
+
+dxRay::dxRay (dSpaceID space, dReal _length) : dxGeom (space,1)
+{
+    type = dRayClass;
+    length = _length;
+}
+
+
+void dxRay::computeAABB()
+{
+    dVector3 e;
+    e[0] = final_posr->pos[0] + final_posr->R[0*4+2]*length;
+    e[1] = final_posr->pos[1] + final_posr->R[1*4+2]*length;
+    e[2] = final_posr->pos[2] + final_posr->R[2*4+2]*length;
+
+    if (final_posr->pos[0] < e[0]){
+        aabb[0] = final_posr->pos[0];
+        aabb[1] = e[0];
+    }
+    else{
+        aabb[0] = e[0];
+        aabb[1] = final_posr->pos[0];
+    }
+
+    if (final_posr->pos[1] < e[1]){
+        aabb[2] = final_posr->pos[1];
+        aabb[3] = e[1];
+    }
+    else{
+        aabb[2] = e[1];
+        aabb[3] = final_posr->pos[1];
+    }
+
+    if (final_posr->pos[2] < e[2]){
+        aabb[4] = final_posr->pos[2];
+        aabb[5] = e[2];
+    }
+    else{
+        aabb[4] = e[2];
+        aabb[5] = final_posr->pos[2];
+    }
+}
+
+
+dGeomID dCreateRay (dSpaceID space, dReal length)
+{
+    return new dxRay (space,length);
+}
+
+
+void dGeomRaySetLength (dGeomID g, dReal length)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+    dxRay *r = (dxRay*) g;
+    r->length = length;
+    dGeomMoved (g);
+}
+
+
+dReal dGeomRayGetLength (dGeomID g)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+    dxRay *r = (dxRay*) g;
+    return r->length;
+}
+
+
+void dGeomRaySet (dGeomID g, dReal px, dReal py, dReal pz,
+                  dReal dx, dReal dy, dReal dz)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+    g->recomputePosr();
+    dReal* rot = g->final_posr->R;
+    dReal* pos = g->final_posr->pos;
+    dVector3 n;
+    pos[0] = px;
+    pos[1] = py;
+    pos[2] = pz;
+
+    n[0] = dx;
+    n[1] = dy;
+    n[2] = dz;
+    dNormalize3(n);
+    rot[0*4+2] = n[0];
+    rot[1*4+2] = n[1];
+    rot[2*4+2] = n[2];
+    dGeomMoved (g);
+}
+
+
+void dGeomRayGet (dGeomID g, dVector3 start, dVector3 dir)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+    g->recomputePosr();
+    start[0] = g->final_posr->pos[0];
+    start[1] = g->final_posr->pos[1];
+    start[2] = g->final_posr->pos[2];
+    dir[0] = g->final_posr->R[0*4+2];
+    dir[1] = g->final_posr->R[1*4+2];
+    dir[2] = g->final_posr->R[2*4+2];
+}
+
+
+void dGeomRaySetParams (dxGeom *g, int FirstContact, int BackfaceCull)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+
+    dGeomRaySetFirstContact(g, FirstContact);
+    dGeomRaySetBackfaceCull(g, BackfaceCull);
+}
+
+
+void dGeomRayGetParams (dxGeom *g, int *FirstContact, int *BackfaceCull)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+
+    (*FirstContact) = ((g->gflags & RAY_FIRSTCONTACT) != 0);
+    (*BackfaceCull) = ((g->gflags & RAY_BACKFACECULL) != 0);
+}
+
+
+// set/get backface culling flag
+void dGeomRaySetBackfaceCull (dxGeom *g, int backfaceCull) 
+{
+    
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+    if (backfaceCull) {
+        g->gflags |= RAY_BACKFACECULL;
+    } else {
+        g->gflags &= ~RAY_BACKFACECULL;
+    }
+}
+
+
+int dGeomRayGetBackfaceCull (dxGeom *g)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+    return ((g->gflags & RAY_BACKFACECULL) != 0);
+}
+
+
+// set/get first contact flag
+void dGeomRaySetFirstContact (dxGeom *g, int firstContact)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+    if (firstContact) {
+        g->gflags |= RAY_FIRSTCONTACT;
+    } else {
+        g->gflags &= ~RAY_FIRSTCONTACT;
+    }
+}
+
+
+int dGeomRayGetFirstContact (dxGeom *g)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+    return ((g->gflags & RAY_FIRSTCONTACT) != 0);
+}
+
+
+void dGeomRaySetClosestHit (dxGeom *g, int closestHit)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+    if (closestHit){
+        g->gflags |= RAY_CLOSEST_HIT;
+    }
+    else g->gflags &= ~RAY_CLOSEST_HIT;
+}
+
+
+int dGeomRayGetClosestHit (dxGeom *g)
+{
+    dUASSERT (g && g->type == dRayClass,"argument not a ray");
+    return ((g->gflags & RAY_CLOSEST_HIT) != 0);
+}
+
+
+
+// if mode==1 then use the sphere exit contact, not the entry contact
+
+static int ray_sphere_helper (dxRay *ray, dVector3 sphere_pos, dReal radius,
+                              dContactGeom *contact, int mode)
+{
+    dVector3 q;
+    q[0] = ray->final_posr->pos[0] - sphere_pos[0];
+    q[1] = ray->final_posr->pos[1] - sphere_pos[1];
+    q[2] = ray->final_posr->pos[2] - sphere_pos[2];
+    dReal B = dCalcVectorDot3_14(q,ray->final_posr->R+2);
+    dReal C = dCalcVectorDot3(q,q) - radius*radius;
+    // note: if C <= 0 then the start of the ray is inside the sphere
+    dReal k = B*B - C;
+    if (k < 0) return 0;
+    k = dSqrt(k);
+    dReal alpha;
+    if (mode && C >= 0) {
+        alpha = -B + k;
+        if (alpha < 0) return 0;
+    }
+    else {
+        alpha = -B - k;
+        if (alpha < 0) {
+            alpha = -B + k;
+            if (alpha < 0) return 0;
+        }
+    }
+    if (alpha > ray->length) return 0;
+    contact->pos[0] = ray->final_posr->pos[0] + alpha*ray->final_posr->R[0*4+2];
+    contact->pos[1] = ray->final_posr->pos[1] + alpha*ray->final_posr->R[1*4+2];
+    contact->pos[2] = ray->final_posr->pos[2] + alpha*ray->final_posr->R[2*4+2];
+    dReal nsign = (C < 0 || mode) ? REAL(-1.0) : REAL(1.0);
+    contact->normal[0] = nsign*(contact->pos[0] - sphere_pos[0]);
+    contact->normal[1] = nsign*(contact->pos[1] - sphere_pos[1]);
+    contact->normal[2] = nsign*(contact->pos[2] - sphere_pos[2]);
+    dNormalize3 (contact->normal);
+    contact->depth = alpha;
+    return 1;
+}
+
+
+int dCollideRaySphere (dxGeom *o1, dxGeom *o2, int flags,
+                       dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dRayClass);
+    dIASSERT (o2->type == dSphereClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxRay *ray = (dxRay*) o1;
+    dxSphere *sphere = (dxSphere*) o2;
+    contact->g1 = ray;
+    contact->g2 = sphere;
+    contact->side1 = -1;
+    contact->side2 = -1;
+    return ray_sphere_helper (ray,sphere->final_posr->pos,sphere->radius,contact,0);
+}
+
+
+int dCollideRayBox (dxGeom *o1, dxGeom *o2, int flags,
+                    dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dRayClass);
+    dIASSERT (o2->type == dBoxClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxRay *ray = (dxRay*) o1;
+    dxBox *box = (dxBox*) o2;
+
+    contact->g1 = ray;
+    contact->g2 = box;
+    contact->side1 = -1;
+    contact->side2 = -1;
+
+    int i;
+
+    // compute the start and delta of the ray relative to the box.
+    // we will do all subsequent computations in this box-relative coordinate
+    // system. we have to do a translation and rotation for each point.
+    dVector3 tmp,s,v;
+    tmp[0] = ray->final_posr->pos[0] - box->final_posr->pos[0];
+    tmp[1] = ray->final_posr->pos[1] - box->final_posr->pos[1];
+    tmp[2] = ray->final_posr->pos[2] - box->final_posr->pos[2];
+    dMultiply1_331 (s,box->final_posr->R,tmp);
+    tmp[0] = ray->final_posr->R[0*4+2];
+    tmp[1] = ray->final_posr->R[1*4+2];
+    tmp[2] = ray->final_posr->R[2*4+2];
+    dMultiply1_331 (v,box->final_posr->R,tmp);
+
+    // mirror the line so that v has all components >= 0
+    dVector3 sign;
+    for (i=0; i<3; i++) {
+        if (v[i] < 0) {
+            s[i] = -s[i];
+            v[i] = -v[i];
+            sign[i] = 1;
+        }
+        else sign[i] = -1;
+    }
+
+    // compute the half-sides of the box
+    dReal h[3];
+    h[0] = REAL(0.5) * box->side[0];
+    h[1] = REAL(0.5) * box->side[1];
+    h[2] = REAL(0.5) * box->side[2];
+
+    // do a few early exit tests
+    if ((s[0] < -h[0] && v[0] <= 0) || s[0] >  h[0] ||
+        (s[1] < -h[1] && v[1] <= 0) || s[1] >  h[1] ||
+        (s[2] < -h[2] && v[2] <= 0) || s[2] >  h[2] ||
+        (v[0] == 0 && v[1] == 0 && v[2] == 0)) {
+            return 0;
+    }
+
+    // compute the t=[lo..hi] range for where s+v*t intersects the box
+    dReal lo = -dInfinity;
+    dReal hi = dInfinity;
+    int nlo = 0, nhi = 0;
+    for (i=0; i<3; i++) {
+        if (v[i] != 0) {
+            dReal k = (-h[i] - s[i])/v[i];
+            if (k > lo) {
+                lo = k;
+                nlo = i;
+            }
+            k = (h[i] - s[i])/v[i];
+            if (k < hi) {
+                hi = k;
+                nhi = i;
+            }
+        }
+    }
+
+    // check if the ray intersects
+    if (lo > hi) return 0;
+    dReal alpha;
+    int n;
+    if (lo >= 0) {
+        alpha = lo;
+        n = nlo;
+    }
+    else {
+        alpha = hi;
+        n = nhi;
+    }
+    if (alpha < 0 || alpha > ray->length) return 0;
+    contact->pos[0] = ray->final_posr->pos[0] + alpha*ray->final_posr->R[0*4+2];
+    contact->pos[1] = ray->final_posr->pos[1] + alpha*ray->final_posr->R[1*4+2];
+    contact->pos[2] = ray->final_posr->pos[2] + alpha*ray->final_posr->R[2*4+2];
+    contact->normal[0] = box->final_posr->R[0*4+n] * sign[n];
+    contact->normal[1] = box->final_posr->R[1*4+n] * sign[n];
+    contact->normal[2] = box->final_posr->R[2*4+n] * sign[n];
+    contact->depth = alpha;
+    return 1;
+}
+
+
+int dCollideRayCapsule (dxGeom *o1, dxGeom *o2,
+                        int flags, dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dRayClass);
+    dIASSERT (o2->type == dCapsuleClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxRay *ray = (dxRay*) o1;
+    dxCapsule *ccyl = (dxCapsule*) o2;
+
+    contact->g1 = ray;
+    contact->g2 = ccyl;
+    contact->side1 = -1;
+    contact->side2 = -1;
+
+    dReal lz2 = ccyl->lz * REAL(0.5);
+
+    // compute some useful info
+    dVector3 cs,q,r;
+    dReal C,k;
+    cs[0] = ray->final_posr->pos[0] - ccyl->final_posr->pos[0];
+    cs[1] = ray->final_posr->pos[1] - ccyl->final_posr->pos[1];
+    cs[2] = ray->final_posr->pos[2] - ccyl->final_posr->pos[2];
+    k = dCalcVectorDot3_41(ccyl->final_posr->R+2,cs);	// position of ray start along ccyl axis
+    q[0] = k*ccyl->final_posr->R[0*4+2] - cs[0];
+    q[1] = k*ccyl->final_posr->R[1*4+2] - cs[1];
+    q[2] = k*ccyl->final_posr->R[2*4+2] - cs[2];
+    C = dCalcVectorDot3(q,q) - ccyl->radius*ccyl->radius;
+    // if C < 0 then ray start position within infinite extension of cylinder
+
+    // see if ray start position is inside the capped cylinder
+    int inside_ccyl = 0;
+    if (C < 0) {
+        if (k < -lz2) k = -lz2;
+        else if (k > lz2) k = lz2;
+        r[0] = ccyl->final_posr->pos[0] + k*ccyl->final_posr->R[0*4+2];
+        r[1] = ccyl->final_posr->pos[1] + k*ccyl->final_posr->R[1*4+2];
+        r[2] = ccyl->final_posr->pos[2] + k*ccyl->final_posr->R[2*4+2];
+        if ((ray->final_posr->pos[0]-r[0])*(ray->final_posr->pos[0]-r[0]) +
+            (ray->final_posr->pos[1]-r[1])*(ray->final_posr->pos[1]-r[1]) +
+            (ray->final_posr->pos[2]-r[2])*(ray->final_posr->pos[2]-r[2]) < ccyl->radius*ccyl->radius) {
+                inside_ccyl = 1;
+        }
+    }
+
+    // compute ray collision with infinite cylinder, except for the case where
+    // the ray is outside the capped cylinder but within the infinite cylinder
+    // (it that case the ray can only hit endcaps)
+    if (!inside_ccyl && C < 0) {
+        // set k to cap position to check
+        if (k < 0) k = -lz2; else k = lz2;
+    }
+    else {
+        dReal uv = dCalcVectorDot3_44(ccyl->final_posr->R+2,ray->final_posr->R+2);
+        r[0] = uv*ccyl->final_posr->R[0*4+2] - ray->final_posr->R[0*4+2];
+        r[1] = uv*ccyl->final_posr->R[1*4+2] - ray->final_posr->R[1*4+2];
+        r[2] = uv*ccyl->final_posr->R[2*4+2] - ray->final_posr->R[2*4+2];
+        dReal A = dCalcVectorDot3(r,r);
+        // A == 0 means that the ray and ccylinder axes are parallel
+        if (A == 0) { // There is a division by A below...
+            // set k to cap position to check
+            if (uv < 0) k = -lz2; else k = lz2;
+        }
+        else {
+            dReal B = 2*dCalcVectorDot3(q,r);
+            k = B*B-4*A*C;
+            if (k < 0) {
+                // the ray does not intersect the infinite cylinder, but if the ray is
+                // inside and parallel to the cylinder axis it may intersect the end
+                // caps. set k to cap position to check.
+                if (!inside_ccyl) return 0;
+                if (uv < 0) k = -lz2; else k = lz2;
+            }
+            else {
+                k = dSqrt(k);
+                A = dRecip (2*A);
+                dReal alpha = (-B-k)*A;
+                if (alpha < 0) {
+                    alpha = (-B+k)*A;
+                    if (alpha < 0) return 0;
+                }
+                if (alpha > ray->length) return 0;
+
+                // the ray intersects the infinite cylinder. check to see if the
+                // intersection point is between the caps
+                contact->pos[0] = ray->final_posr->pos[0] + alpha*ray->final_posr->R[0*4+2];
+                contact->pos[1] = ray->final_posr->pos[1] + alpha*ray->final_posr->R[1*4+2];
+                contact->pos[2] = ray->final_posr->pos[2] + alpha*ray->final_posr->R[2*4+2];
+                q[0] = contact->pos[0] - ccyl->final_posr->pos[0];
+                q[1] = contact->pos[1] - ccyl->final_posr->pos[1];
+                q[2] = contact->pos[2] - ccyl->final_posr->pos[2];
+                k = dCalcVectorDot3_14(q,ccyl->final_posr->R+2);
+                dReal nsign = inside_ccyl ? REAL(-1.0) : REAL(1.0);
+                if (k >= -lz2 && k <= lz2) {
+                    contact->normal[0] = nsign * (contact->pos[0] -
+                        (ccyl->final_posr->pos[0] + k*ccyl->final_posr->R[0*4+2]));
+                    contact->normal[1] = nsign * (contact->pos[1] -
+                        (ccyl->final_posr->pos[1] + k*ccyl->final_posr->R[1*4+2]));
+                    contact->normal[2] = nsign * (contact->pos[2] -
+                        (ccyl->final_posr->pos[2] + k*ccyl->final_posr->R[2*4+2]));
+                    dNormalize3 (contact->normal);
+                    contact->depth = alpha;
+                    return 1;
+                }
+
+                // the infinite cylinder intersection point is not between the caps.
+                // set k to cap position to check.
+                if (k < 0) k = -lz2; else k = lz2;
+            }
+        }
+    }
+
+    // check for ray intersection with the caps. k must indicate the cap
+    // position to check
+    q[0] = ccyl->final_posr->pos[0] + k*ccyl->final_posr->R[0*4+2];
+    q[1] = ccyl->final_posr->pos[1] + k*ccyl->final_posr->R[1*4+2];
+    q[2] = ccyl->final_posr->pos[2] + k*ccyl->final_posr->R[2*4+2];
+    return ray_sphere_helper (ray,q,ccyl->radius,contact, inside_ccyl);
+}
+
+
+int dCollideRayPlane (dxGeom *o1, dxGeom *o2, int flags,
+                      dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dRayClass);
+    dIASSERT (o2->type == dPlaneClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxRay *ray = (dxRay*) o1;
+    dxPlane *plane = (dxPlane*) o2;
+
+    dReal alpha = plane->p[3] - dCalcVectorDot3 (plane->p,ray->final_posr->pos);
+    // note: if alpha > 0 the starting point is below the plane
+    dReal nsign = (alpha > 0) ? REAL(-1.0) : REAL(1.0);
+    dReal k = dCalcVectorDot3_14(plane->p,ray->final_posr->R+2);
+    if (k==0) return 0;		// ray parallel to plane
+    alpha /= k;
+    if (alpha < 0 || alpha > ray->length) return 0;
+    contact->pos[0] = ray->final_posr->pos[0] + alpha*ray->final_posr->R[0*4+2];
+    contact->pos[1] = ray->final_posr->pos[1] + alpha*ray->final_posr->R[1*4+2];
+    contact->pos[2] = ray->final_posr->pos[2] + alpha*ray->final_posr->R[2*4+2];
+    contact->normal[0] = nsign*plane->p[0];
+    contact->normal[1] = nsign*plane->p[1];
+    contact->normal[2] = nsign*plane->p[2];
+    contact->depth = alpha;
+    contact->g1 = ray;
+    contact->g2 = plane;
+    contact->side1 = -1;
+    contact->side2 = -1;
+    return 1;
+}
+
+// Ray-Cylinder collider by Joseph Cooper (2011)
+int dCollideRayCylinder( dxGeom *o1, dxGeom *o2, int flags, dContactGeom *contact, int skip )
+{
+    dIASSERT( skip >= (int)sizeof( dContactGeom ) );
+    dIASSERT( o1->type == dRayClass );
+    dIASSERT( o2->type == dCylinderClass );
+    dIASSERT( (flags & NUMC_MASK) >= 1 );
+
+    dxRay* ray = (dxRay*)( o1 );
+    dxCylinder* cyl = (dxCylinder*)( o2 );
+
+    // Fill in contact information.
+    contact->g1 = ray;
+    contact->g2 = cyl;
+    contact->side1 = -1;
+    contact->side2 = -1;
+
+    const dReal half_length = cyl->lz * REAL( 0.5 );
+
+
+    /* Possible collision cases:
+     *  Ray origin between/outside caps
+     *  Ray origin within/outside radius
+     *  Ray direction left/right/perpendicular
+     *  Ray direction parallel/perpendicular/other
+     * 
+     *  Ray origin cases (ignoring origin on surface)
+     *
+     *  A          B
+     *     /-\-----------\
+     *  C (   )    D      )
+     *     \_/___________/
+     *
+     *  Cases A and D can collide with caps or cylinder
+     *  Case C can only collide with the caps
+     *  Case B can only collide with the cylinder
+     *  Case D will produce inverted normals
+     *  If the ray is perpendicular, only check the cylinder
+     *  If the ray is parallel to cylinder axis,
+     *  we can only check caps
+     *  If the ray points right,
+     *    Case A,C Check left cap
+     *    Case  D  Check right cap
+     *  If the ray points left
+     *    Case A,C Check right cap
+     *    Case  D  Check left cap
+     *  Case B, check only first possible cylinder collision
+     *  Case D, check only second possible cylinder collision
+     */
+    // Find the ray in the cylinder coordinate frame:
+    dVector3 tmp;
+    dVector3 pos;  // Ray origin in cylinder frame
+    dVector3 dir;  // Ray direction in cylinder frame
+    // Translate ray start by inverse cyl
+    dSubtractVectors3(tmp,ray->final_posr->pos,cyl->final_posr->pos);
+    // Rotate ray start by inverse cyl
+    dMultiply1_331(pos,cyl->final_posr->R,tmp);
+
+    // Get the ray's direction
+    tmp[0] = ray->final_posr->R[2];
+    tmp[1] = ray->final_posr->R[6];
+    tmp[2] = ray->final_posr->R[10];
+    // Rotate the ray direction by inverse cyl
+    dMultiply1_331(dir,cyl->final_posr->R,tmp); 
+
+    // Is the ray origin inside of the (extended) cylinder?
+    dReal r2 = cyl->radius*cyl->radius;
+    dReal C = pos[0]*pos[0] + pos[1]*pos[1] - r2;
+
+    // Find the different cases
+    // Is ray parallel to the cylinder length?
+    int parallel = (dir[0]==0 && dir[1]==0);
+    // Is ray perpendicular to the cylinder length?
+    int perpendicular = (dir[2]==0);
+    // Is ray origin within the radius of the caps?
+    int inRadius = (C<=0);
+    // Is ray origin between the top and bottom caps?
+    int inCaps   = (dFabs(pos[2])<=half_length);
+
+    int checkCaps = (!perpendicular && (!inCaps || inRadius));
+    int checkCyl  = (!parallel && (!inRadius || inCaps));
+    int flipNormals = (inCaps&&inRadius);
+
+    dReal tt=-dInfinity; // Depth to intersection
+    dVector3 tmpNorm = {dNaN, dNaN, dNaN}; // ensure we don't leak garbage
+
+    if (checkCaps) {
+        // Make it so we only need to check one cap
+        int flipDir = 0;
+        // Wish c had logical xor...
+        if ((dir[2]<0 && flipNormals) || (dir[2]>0 && !flipNormals)) {
+            flipDir = 1;
+            dir[2]=-dir[2];
+            pos[2]=-pos[2];
+        }
+        // The cap is half the cylinder's length
+        // from the cylinder's origin
+        // We only checkCaps if dir[2]!=0
+        tt = (half_length-pos[2])/dir[2];
+        if (tt>=0 && tt<=ray->length) {
+            tmp[0] = pos[0] + tt*dir[0];
+            tmp[1] = pos[1] + tt*dir[1];
+            // Ensure collision point is within cap circle
+            if (tmp[0]*tmp[0] + tmp[1]*tmp[1] <= r2) {
+                // Successful collision
+                tmp[2] = (flipDir)?-half_length:half_length;
+                tmpNorm[0]=0;
+                tmpNorm[1]=0;
+                tmpNorm[2]=(flipDir!=flipNormals)?-REAL(1.0):REAL(1.0);
+                checkCyl = 0;  // Short circuit cylinder check
+            } else {
+                // Ray hits cap plane outside of cap circle
+                tt=-dInfinity; // No collision yet
+            }
+        } else {
+            // The cap plane is beyond (or behind) the ray length
+            tt=-dInfinity; // No collision yet
+        }
+        if (flipDir) {
+            // Flip back
+            dir[2]=-dir[2];
+            pos[2]=-pos[2];
+        }
+    }
+    if (checkCyl) {
+        // Compute quadratic formula for parametric ray equation
+        dReal A =    dir[0]*dir[0] + dir[1]*dir[1];
+        dReal B = 2*(pos[0]*dir[0] + pos[1]*dir[1]);
+        // Already computed C
+
+        dReal k = B*B - 4*A*C;
+        // Check collision with infinite cylinder
+        // k<0 means the ray passes outside the cylinder
+        // k==0 means ray is tangent to cylinder (or parallel)
+        //
+        //  Our quadratic formula: tt = (-B +- sqrt(k))/(2*A)   
+        // 
+        // A must be positive (otherwise we wouldn't be checking
+        // cylinder because ray is parallel)
+        //    if (k<0) ray doesn't collide with sphere
+        //    if (B > sqrt(k)) then both times are negative
+        //         -- don't calculate
+        //    if (B<-sqrt(k)) then both times are positive (Case A or B)
+        //         -- only calculate first, if first isn't valid
+        //         -- second can't be without first going through a cap
+        //    otherwise (fabs(B)<=sqrt(k)) then C<=0 (ray-origin inside/on cylinder)
+        //         -- only calculate second collision
+        if (k>=0 && (B<0 || B*B<=k)) {
+            k = dSqrt(k); 
+            A = dRecip(2*A);
+            if (dFabs(B)<=k) {
+                tt = (-B + k)*A; // Second solution
+                // If ray origin is on surface and pointed out, we
+                // can get a tt=0 solution...
+            } else {
+                tt = (-B - k)*A; // First solution
+            }
+            if (tt<=ray->length) {
+                tmp[2] = pos[2] + tt*dir[2];
+                if (dFabs(tmp[2])<=half_length) {
+                    // Valid solution
+                    tmp[0] = pos[0] + tt*dir[0];
+                    tmp[1] = pos[1] + tt*dir[1];
+                    tmpNorm[0] = tmp[0]/cyl->radius;
+                    tmpNorm[1] = tmp[1]/cyl->radius;
+                    tmpNorm[2] = 0;
+                    if (flipNormals) {
+                        // Ray origin was inside cylinder
+                        tmpNorm[0] = -tmpNorm[0];
+                        tmpNorm[1] = -tmpNorm[1];
+                    }
+                } else {
+                    // Ray hits cylinder outside of caps
+                    tt=-dInfinity;
+                }
+            } else {
+                // Ray doesn't reach the cylinder
+                tt=-dInfinity;
+            }
+        }
+    }
+
+    if (tt>0) {
+        contact->depth = tt;
+        // Transform the point back to world coordinates
+        tmpNorm[3]=0;
+        tmp[3] = 0;
+        dMultiply0_331(contact->normal,cyl->final_posr->R,tmpNorm);
+        dMultiply0_331(contact->pos,cyl->final_posr->R,tmp);
+        contact->pos[0]+=cyl->final_posr->pos[0];
+        contact->pos[1]+=cyl->final_posr->pos[1];
+        contact->pos[2]+=cyl->final_posr->pos[2];
+
+        return 1;
+    }
+    // No contact with anything.
+    return 0;
+}
diff --git a/libs/ode-0.16.1/ode/src/resource_control.cpp b/libs/ode-0.16.1/ode/src/resource_control.cpp
new file mode 100644
index 0000000..29a3d83
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/resource_control.cpp
@@ -0,0 +1,259 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * Resource accounting/preallocation class implementations
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+
+#include <ode/common.h>
+#include <ode/cooperative.h>
+#include "config.h"
+#include "resource_control.h"
+#include "simple_cooperative.h"
+
+
+//////////////////////////////////////////////////////////////////////////
+// dxResourceRequirementDescriptor();
+
+dxResourceRequirementDescriptor::~dxResourceRequirementDescriptor()
+{
+    // Do nothing
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// dxRequiredResourceContainer
+
+dxRequiredResourceContainer::~dxRequiredResourceContainer()
+{
+    freeResources();
+}
+
+
+bool dxRequiredResourceContainer::allocateResources(const dxResourceRequirementDescriptor &requirementDescriptor)
+{
+    bool result = false;
+    
+    bool bufferAllocated = false;
+
+    do
+    {
+        sizeint memorySizeRequirement = requirementDescriptor.getMemorySizeRequirement();
+        
+        if (memorySizeRequirement != 0)
+        {
+            unsigned memoryAlignmentRequirement = requirementDescriptor.getMemoryAlignmentRequirement();
+            void *bufferAllocated = m_memoryAllocation.allocAligned(memorySizeRequirement, memoryAlignmentRequirement);
+            if (bufferAllocated == NULL)
+            {
+                break;
+            }
+        }
+        bufferAllocated = true;
+
+        dxThreadingBase *relatedThreading = requirementDescriptor.getrelatedThreading();
+        dIASSERT(relatedThreading != NULL);
+
+        unsigned simultaneousCallRequirement = requirementDescriptor.getSimultaneousCallRequirement();
+        if (simultaneousCallRequirement != 0)
+        {
+            if (!relatedThreading->PreallocateResourcesForThreadedCalls(simultaneousCallRequirement))
+            {
+                break;
+            }
+        }
+
+        dCallWaitID stockCallWait = NULL;
+
+        if (requirementDescriptor.getIsStockCallWaitRequired())
+        {
+             stockCallWait = relatedThreading->AllocateOrRetrieveStockCallWaitID();
+             if (stockCallWait == NULL)
+             {
+                 break;
+             }
+        }
+
+        m_relatedThreading = relatedThreading;
+    	m_stockCallWait = stockCallWait;
+    
+    	result = true;
+    }
+    while (false);
+
+    if (!result)
+    {
+        if (bufferAllocated)
+        {
+            m_memoryAllocation.freeAllocation();
+        }
+    }
+    
+    return result;
+
+}
+
+void dxRequiredResourceContainer::freeResources()
+{
+    if (m_relatedThreading != NULL)
+    {
+        m_relatedThreading = NULL;
+        m_stockCallWait = NULL;
+        m_memoryAllocation.freeAllocation();
+    }
+    else
+    {
+        dIASSERT(m_stockCallWait == NULL);
+        dIASSERT(m_memoryAllocation.getUserAreaPointer() == NULL);
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Public interface functions
+
+static inline 
+dResourceRequirementsID encodeResourceRequirementsID(dxResourceRequirementDescriptor *requirementsDescriptor)
+{
+    return (dResourceRequirementsID)requirementsDescriptor;
+}
+
+
+/*extern ODE_API */
+dResourceRequirementsID dResourceRequirementsCreate(dCooperativeID cooperative)
+{
+    dAASSERT(cooperative != NULL);
+
+    dxSimpleCooperative *cooperativeInstance = decodeCooperativeID(cooperative);
+    dxThreadingBase *threading = cooperativeInstance->getRelatedThreading();
+
+    dxResourceRequirementDescriptor *requirementsDescriptor = new dxResourceRequirementDescriptor(threading);
+    
+    dResourceRequirementsID result = encodeResourceRequirementsID(requirementsDescriptor);
+    return result;
+}
+
+/*extern ODE_API */
+void dResourceRequirementsDestroy(dResourceRequirementsID requirements)
+{
+    dxResourceRequirementDescriptor *requirementsDescriptor = decodeResourceRequirementsID(requirements);
+
+    if (requirementsDescriptor != NULL)
+    {
+        delete requirementsDescriptor;
+    }
+}
+
+
+/*extern ODE_API */
+dResourceRequirementsID dResourceRequirementsClone(/*const */dResourceRequirementsID requirements)
+{
+    dAASSERT(requirements != NULL);
+
+    dxResourceRequirementDescriptor *requirementsDescriptor = decodeResourceRequirementsID(requirements);
+
+    dxResourceRequirementDescriptor *descriptorClone = new dxResourceRequirementDescriptor(*requirementsDescriptor);
+
+    dResourceRequirementsID result = encodeResourceRequirementsID(descriptorClone);
+    return result;
+}
+
+/*extern ODE_API */
+void dResourceRequirementsMergeIn(dResourceRequirementsID summaryRequirements, /*const */dResourceRequirementsID extraRequirements)
+{
+    dAASSERT(summaryRequirements != NULL);
+    dAASSERT(extraRequirements != NULL);
+
+    dxResourceRequirementDescriptor *summaryDescriptor = decodeResourceRequirementsID(summaryRequirements);
+    dxResourceRequirementDescriptor *extraDescriptor = decodeResourceRequirementsID(extraRequirements);
+
+    summaryDescriptor->mergeAnotherDescriptorIn(*extraDescriptor);
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+
+static inline 
+dResourceContainerID encodeResourceContainerID(dxRequiredResourceContainer *containerInstance)
+{
+    return (dResourceContainerID)containerInstance;
+}
+
+
+/*extern ODE_API */
+dResourceContainerID dResourceContainerAcquire(/*const */dResourceRequirementsID requirements)
+{
+    dAASSERT(requirements != NULL);
+
+    dResourceContainerID result = NULL;
+    bool allocationSucceeded = false;
+    
+    dxRequiredResourceContainer *containerInstance;
+    bool containerAllocated = false;
+
+    dxResourceRequirementDescriptor *requirementsInstance = decodeResourceRequirementsID(requirements);
+
+    do
+    {
+        containerInstance = new dxRequiredResourceContainer();
+
+        if (containerInstance == NULL)
+        {
+            break;
+        }
+
+        containerAllocated = true;
+    
+        if (!containerInstance->allocateResources(*requirementsInstance))
+        {
+            break;
+        }
+
+    	result = encodeResourceContainerID(containerInstance);
+        allocationSucceeded = true;
+    }
+    while (false);
+
+    if (!allocationSucceeded)
+    {
+        if (containerAllocated)
+        {
+            delete containerInstance;
+        }
+    }
+    
+    return result;
+}
+
+/*extern ODE_API */
+void dResourceContainerDestroy(dResourceContainerID resources)
+{
+    dxRequiredResourceContainer *containerInstance = decodeResourceContainerID(resources);
+
+    if (containerInstance != NULL)
+    {
+        delete containerInstance;
+    }
+}
+
diff --git a/libs/ode-0.16.1/ode/src/resource_control.h b/libs/ode-0.16.1/ode/src/resource_control.h
new file mode 100644
index 0000000..cadae0e
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/resource_control.h
@@ -0,0 +1,151 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * Resource accounting/preallocation class declarations
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+#ifndef _ODE__PRIVATE_RESOURCE_CONTRIOL_H_
+#define _ODE__PRIVATE_RESOURCE_CONTRIOL_H_
+
+
+#include "objects.h"
+#include "threading_base.h"
+#include "odeou.h"
+#include "common.h"
+#include "error.h"
+
+
+using _OU_NAMESPACE::CSimpleFlags;
+
+
+class dxResourceRequirementDescriptor:
+    public dBase
+{
+public:
+    explicit dxResourceRequirementDescriptor(dxThreadingBase *relatedThreading):
+        dBase(),
+        m_relatedThreading(relatedThreading),
+        m_memorySizeRequirement(0),
+        m_memoryAlignmentRequirement(0),
+        m_simultaneousCallRequirement(0),
+        m_featureRequirements()
+    {
+    }
+
+    ~dxResourceRequirementDescriptor();
+
+    enum
+    {
+        STOCK_CALLWAIT_REQUIRED = 0x00000001,
+    };
+
+    void mergeAnotherDescriptorIn(const dxResourceRequirementDescriptor &anotherDescriptor)
+    {
+        dIASSERT(getrelatedThreading() == anotherDescriptor.getrelatedThreading()); // m_simultaneousCallRequirement typically depends on threading used
+
+        CSimpleFlags::value_type allOtherFeatureFlags = anotherDescriptor.queryAllFeatureFlags();
+        mergeAnotherDescriptorIn(anotherDescriptor.m_memorySizeRequirement, anotherDescriptor.m_memoryAlignmentRequirement, anotherDescriptor.m_simultaneousCallRequirement, allOtherFeatureFlags);
+    }
+
+    void mergeAnotherDescriptorIn(sizeint memorySizeRequirement/*=0*/, unsigned memoryAlignmentRequirement,
+        unsigned simultaneousCallRequirement/*=0*/, unsigned featureRequirement/*=0*/)
+    {
+        m_memorySizeRequirement = dMACRO_MAX(m_memorySizeRequirement, memorySizeRequirement);
+        m_memoryAlignmentRequirement = dMACRO_MAX(m_memoryAlignmentRequirement, memoryAlignmentRequirement);
+        m_simultaneousCallRequirement = dMACRO_MAX(m_simultaneousCallRequirement, simultaneousCallRequirement);
+        mergeFeatureFlags(featureRequirement);
+    }
+
+public:
+    dxThreadingBase *getrelatedThreading() const { return m_relatedThreading; }
+    sizeint getMemorySizeRequirement() const { return m_memorySizeRequirement; }
+    unsigned getMemoryAlignmentRequirement() const { return m_memoryAlignmentRequirement; }
+
+    unsigned getSimultaneousCallRequirement() const { return m_simultaneousCallRequirement; }
+
+    bool getIsStockCallWaitRequired() const { return getStockCallWaitRequiredFlag(); }
+
+private:
+    enum
+    {
+        FL_STOCK_CALLWAIT_REQUIRED  = STOCK_CALLWAIT_REQUIRED,
+    };
+
+    bool getStockCallWaitRequiredFlag() const { return m_featureRequirements.GetFlagsMaskValue(FL_STOCK_CALLWAIT_REQUIRED); }
+
+    CSimpleFlags::value_type queryAllFeatureFlags() const { return m_featureRequirements.QueryFlagsAllValues(); }
+    void mergeFeatureFlags(CSimpleFlags::value_type flagValues) { m_featureRequirements.SignalFlagsMaskValue(flagValues); }
+
+private:
+    dxThreadingBase     *m_relatedThreading;
+    sizeint              m_memorySizeRequirement;
+    unsigned            m_memoryAlignmentRequirement;
+    unsigned            m_simultaneousCallRequirement;
+    CSimpleFlags        m_featureRequirements;
+};
+
+static inline 
+dxResourceRequirementDescriptor *decodeResourceRequirementsID(dResourceRequirementsID requirements)
+{
+    return (dxResourceRequirementDescriptor *)requirements;
+}
+
+
+class dxRequiredResourceContainer:
+    public dBase
+{
+public:
+    dxRequiredResourceContainer():
+        dBase(),
+        m_relatedThreading(NULL),
+        m_stockCallWait(NULL),
+        m_memoryAllocation()
+    {
+    }
+
+    ~dxRequiredResourceContainer();
+
+    bool allocateResources(const dxResourceRequirementDescriptor &requirementDescriptor);
+    void freeResources();
+
+public:
+    dxThreadingBase *getThreadingInstance() const { return m_relatedThreading; }
+    dCallWaitID getStockCallWait() const { return m_stockCallWait; }
+    void *getMemoryBufferPointer() const { return m_memoryAllocation.getUserAreaPointer(); }
+    sizeint getMemoryBufferSize() const { return m_memoryAllocation.getUserAreaSize(); }
+
+private:
+    dxThreadingBase     *m_relatedThreading;
+    dCallWaitID         m_stockCallWait;
+    dxAlignedAllocation m_memoryAllocation;
+};
+
+static inline 
+dxRequiredResourceContainer *decodeResourceContainerID(dResourceContainerID resources)
+{
+    return (dxRequiredResourceContainer *)resources;
+}
+
+
+#endif // #ifndef _ODE__PRIVATE_RESOURCE_CONTRIOL_H_
diff --git a/libs/ode-0.16.1/ode/src/rotation.cpp b/libs/ode-0.16.1/ode/src/rotation.cpp
new file mode 100644
index 0000000..e813ba3
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/rotation.cpp
@@ -0,0 +1,317 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+quaternions have the format: (s,vx,vy,vz) where (vx,vy,vz) is the
+"rotation axis" and s is the "rotation angle".
+
+*/
+
+#include <ode/rotation.h>
+#include "config.h"
+#include "odemath.h"
+
+
+#define _R(i,j) R[(i)*4+(j)]
+
+#define SET_3x3_IDENTITY \
+    _R(0,0) = REAL(1.0); \
+    _R(0,1) = REAL(0.0); \
+    _R(0,2) = REAL(0.0); \
+    _R(0,3) = REAL(0.0); \
+    _R(1,0) = REAL(0.0); \
+    _R(1,1) = REAL(1.0); \
+    _R(1,2) = REAL(0.0); \
+    _R(1,3) = REAL(0.0); \
+    _R(2,0) = REAL(0.0); \
+    _R(2,1) = REAL(0.0); \
+    _R(2,2) = REAL(1.0); \
+    _R(2,3) = REAL(0.0);
+
+
+void dRSetIdentity (dMatrix3 R)
+{
+    dAASSERT (R);
+    SET_3x3_IDENTITY;
+}
+
+
+void dRFromAxisAndAngle (dMatrix3 R, dReal ax, dReal ay, dReal az,
+                         dReal angle)
+{
+    dAASSERT (R);
+    dQuaternion q;
+    dQFromAxisAndAngle (q,ax,ay,az,angle);
+    dQtoR (q,R);
+}
+
+
+void dRFromEulerAngles (dMatrix3 R, dReal phi, dReal theta, dReal psi)
+{
+    dReal sphi,cphi,stheta,ctheta,spsi,cpsi;
+    dAASSERT (R);
+    sphi = dSin(phi);
+    cphi = dCos(phi);
+    stheta = dSin(theta);
+    ctheta = dCos(theta);
+    spsi = dSin(psi);
+    cpsi = dCos(psi);
+    _R(0,0) = cpsi*ctheta;
+    _R(0,1) = spsi*ctheta;
+    _R(0,2) =-stheta;
+    _R(0,3) = REAL(0.0);
+    _R(1,0) = cpsi*stheta*sphi - spsi*cphi;
+    _R(1,1) = spsi*stheta*sphi + cpsi*cphi;
+    _R(1,2) = ctheta*sphi;
+    _R(1,3) = REAL(0.0);
+    _R(2,0) = cpsi*stheta*cphi + spsi*sphi;
+    _R(2,1) = spsi*stheta*cphi - cpsi*sphi;
+    _R(2,2) = ctheta*cphi;
+    _R(2,3) = REAL(0.0);
+}
+
+
+void dRFrom2Axes (dMatrix3 R, dReal ax, dReal ay, dReal az,
+                  dReal bx, dReal by, dReal bz)
+{
+    dReal l,k;
+    dAASSERT (R);
+    l = dSqrt (ax*ax + ay*ay + az*az);
+    if (l <= REAL(0.0)) {
+        dDEBUGMSG ("zero length vector");
+        return;
+    }
+    l = dRecip(l);
+    ax *= l;
+    ay *= l;
+    az *= l;
+    k = ax*bx + ay*by + az*bz;
+    bx -= k*ax;
+    by -= k*ay;
+    bz -= k*az;
+    l = dSqrt (bx*bx + by*by + bz*bz);
+    if (l <= REAL(0.0)) {
+        dDEBUGMSG ("zero length vector");
+        return;
+    }
+    l = dRecip(l);
+    bx *= l;
+    by *= l;
+    bz *= l;
+    _R(0,0) = ax;
+    _R(1,0) = ay;
+    _R(2,0) = az;
+    _R(0,1) = bx;
+    _R(1,1) = by;
+    _R(2,1) = bz;
+    _R(0,2) = - by*az + ay*bz;
+    _R(1,2) = - bz*ax + az*bx;
+    _R(2,2) = - bx*ay + ax*by;
+    _R(0,3) = REAL(0.0);
+    _R(1,3) = REAL(0.0);
+    _R(2,3) = REAL(0.0);
+}
+
+
+void dRFromZAxis (dMatrix3 R, dReal ax, dReal ay, dReal az)
+{
+    dVector3 n,p,q;
+    n[0] = ax;
+    n[1] = ay;
+    n[2] = az;
+    dNormalize3 (n);
+    dPlaneSpace (n,p,q);
+    _R(0,0) = p[0];
+    _R(1,0) = p[1];
+    _R(2,0) = p[2];
+    _R(0,1) = q[0];
+    _R(1,1) = q[1];
+    _R(2,1) = q[2];
+    _R(0,2) = n[0];
+    _R(1,2) = n[1];
+    _R(2,2) = n[2];
+    _R(0,3) = REAL(0.0);
+    _R(1,3) = REAL(0.0);
+    _R(2,3) = REAL(0.0);
+}
+
+
+void dQSetIdentity (dQuaternion q)
+{
+    dAASSERT (q);
+    q[0] = 1;
+    q[1] = 0;
+    q[2] = 0;
+    q[3] = 0;
+}
+
+
+void dQFromAxisAndAngle (dQuaternion q, dReal ax, dReal ay, dReal az,
+                         dReal angle)
+{
+    dAASSERT (q);
+    dReal l = ax*ax + ay*ay + az*az;
+    if (l > REAL(0.0)) {
+        angle *= REAL(0.5);
+        q[0] = dCos (angle);
+        l = dSin(angle) * dRecipSqrt(l);
+        q[1] = ax*l;
+        q[2] = ay*l;
+        q[3] = az*l;
+    }
+    else {
+        q[0] = 1;
+        q[1] = 0;
+        q[2] = 0;
+        q[3] = 0;
+    }
+}
+
+
+void dQMultiply0 (dQuaternion qa, const dQuaternion qb, const dQuaternion qc)
+{
+    dAASSERT (qa && qb && qc);
+    qa[0] = qb[0]*qc[0] - qb[1]*qc[1] - qb[2]*qc[2] - qb[3]*qc[3];
+    qa[1] = qb[0]*qc[1] + qb[1]*qc[0] + qb[2]*qc[3] - qb[3]*qc[2];
+    qa[2] = qb[0]*qc[2] + qb[2]*qc[0] + qb[3]*qc[1] - qb[1]*qc[3];
+    qa[3] = qb[0]*qc[3] + qb[3]*qc[0] + qb[1]*qc[2] - qb[2]*qc[1];
+}
+
+
+void dQMultiply1 (dQuaternion qa, const dQuaternion qb, const dQuaternion qc)
+{
+    dAASSERT (qa && qb && qc);
+    qa[0] = qb[0]*qc[0] + qb[1]*qc[1] + qb[2]*qc[2] + qb[3]*qc[3];
+    qa[1] = qb[0]*qc[1] - qb[1]*qc[0] - qb[2]*qc[3] + qb[3]*qc[2];
+    qa[2] = qb[0]*qc[2] - qb[2]*qc[0] - qb[3]*qc[1] + qb[1]*qc[3];
+    qa[3] = qb[0]*qc[3] - qb[3]*qc[0] - qb[1]*qc[2] + qb[2]*qc[1];
+}
+
+
+void dQMultiply2 (dQuaternion qa, const dQuaternion qb, const dQuaternion qc)
+{
+    dAASSERT (qa && qb && qc);
+    qa[0] =  qb[0]*qc[0] + qb[1]*qc[1] + qb[2]*qc[2] + qb[3]*qc[3];
+    qa[1] = -qb[0]*qc[1] + qb[1]*qc[0] - qb[2]*qc[3] + qb[3]*qc[2];
+    qa[2] = -qb[0]*qc[2] + qb[2]*qc[0] - qb[3]*qc[1] + qb[1]*qc[3];
+    qa[3] = -qb[0]*qc[3] + qb[3]*qc[0] - qb[1]*qc[2] + qb[2]*qc[1];
+}
+
+
+void dQMultiply3 (dQuaternion qa, const dQuaternion qb, const dQuaternion qc)
+{
+    dAASSERT (qa && qb && qc);
+    qa[0] =  qb[0]*qc[0] - qb[1]*qc[1] - qb[2]*qc[2] - qb[3]*qc[3];
+    qa[1] = -qb[0]*qc[1] - qb[1]*qc[0] + qb[2]*qc[3] - qb[3]*qc[2];
+    qa[2] = -qb[0]*qc[2] - qb[2]*qc[0] + qb[3]*qc[1] - qb[1]*qc[3];
+    qa[3] = -qb[0]*qc[3] - qb[3]*qc[0] + qb[1]*qc[2] - qb[2]*qc[1];
+}
+
+
+// dRfromQ(), dQfromR() and dDQfromW() are derived from equations in "An Introduction
+// to Physically Based Modeling: Rigid Body Simulation - 1: Unconstrained
+// Rigid Body Dynamics" by David Baraff, Robotics Institute, Carnegie Mellon
+// University, 1997.
+
+void dRfromQ (dMatrix3 R, const dQuaternion q)
+{
+    dAASSERT (q && R);
+    // q = (s,vx,vy,vz)
+    dReal qq1 = 2*q[1]*q[1];
+    dReal qq2 = 2*q[2]*q[2];
+    dReal qq3 = 2*q[3]*q[3];
+    _R(0,0) = 1 - qq2 - qq3;
+    _R(0,1) = 2*(q[1]*q[2] - q[0]*q[3]);
+    _R(0,2) = 2*(q[1]*q[3] + q[0]*q[2]);
+    _R(0,3) = REAL(0.0);
+    _R(1,0) = 2*(q[1]*q[2] + q[0]*q[3]);
+    _R(1,1) = 1 - qq1 - qq3;
+    _R(1,2) = 2*(q[2]*q[3] - q[0]*q[1]);
+    _R(1,3) = REAL(0.0);
+    _R(2,0) = 2*(q[1]*q[3] - q[0]*q[2]);
+    _R(2,1) = 2*(q[2]*q[3] + q[0]*q[1]);
+    _R(2,2) = 1 - qq1 - qq2;
+    _R(2,3) = REAL(0.0);
+}
+
+
+void dQfromR (dQuaternion q, const dMatrix3 R)
+{
+    dAASSERT (q && R);
+    dReal tr,s;
+    tr = _R(0,0) + _R(1,1) + _R(2,2);
+    if (tr >= 0) {
+        s = dSqrt (tr + 1);
+        q[0] = REAL(0.5) * s;
+        s = REAL(0.5) * dRecip(s);
+        q[1] = (_R(2,1) - _R(1,2)) * s;
+        q[2] = (_R(0,2) - _R(2,0)) * s;
+        q[3] = (_R(1,0) - _R(0,1)) * s;
+    }
+    else {
+        // find the largest diagonal element and jump to the appropriate case
+        if (_R(1,1) > _R(0,0)) {
+            if (_R(2,2) > _R(1,1)) goto case_2;
+            goto case_1;
+        }
+        if (_R(2,2) > _R(0,0)) goto case_2;
+        goto case_0;
+
+case_0:
+        s = dSqrt((_R(0,0) - (_R(1,1) + _R(2,2))) + 1);
+        q[1] = REAL(0.5) * s;
+        s = REAL(0.5) * dRecip(s);
+        q[2] = (_R(0,1) + _R(1,0)) * s;
+        q[3] = (_R(2,0) + _R(0,2)) * s;
+        q[0] = (_R(2,1) - _R(1,2)) * s;
+        return;
+
+case_1:
+        s = dSqrt((_R(1,1) - (_R(2,2) + _R(0,0))) + 1);
+        q[2] = REAL(0.5) * s;
+        s = REAL(0.5) * dRecip(s);
+        q[3] = (_R(1,2) + _R(2,1)) * s;
+        q[1] = (_R(0,1) + _R(1,0)) * s;
+        q[0] = (_R(0,2) - _R(2,0)) * s;
+        return;
+
+case_2:
+        s = dSqrt((_R(2,2) - (_R(0,0) + _R(1,1))) + 1);
+        q[3] = REAL(0.5) * s;
+        s = REAL(0.5) * dRecip(s);
+        q[1] = (_R(2,0) + _R(0,2)) * s;
+        q[2] = (_R(1,2) + _R(2,1)) * s;
+        q[0] = (_R(1,0) - _R(0,1)) * s;
+        return;
+    }
+}
+
+
+void dDQfromW (dReal dq[4], const dVector3 w, const dQuaternion q)
+{
+    dAASSERT (w && q && dq);
+    dq[0] = REAL(0.5)*(- w[0]*q[1] - w[1]*q[2] - w[2]*q[3]);
+    dq[1] = REAL(0.5)*(  w[0]*q[0] + w[1]*q[3] - w[2]*q[2]);
+    dq[2] = REAL(0.5)*(- w[0]*q[3] + w[1]*q[0] + w[2]*q[1]);
+    dq[3] = REAL(0.5)*(  w[0]*q[2] - w[1]*q[1] + w[2]*q[0]);
+}
diff --git a/libs/ode-0.16.1/ode/src/simple_cooperative.cpp b/libs/ode-0.16.1/ode/src/simple_cooperative.cpp
new file mode 100644
index 0000000..f8f6f7d
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/simple_cooperative.cpp
@@ -0,0 +1,84 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading base wrapper class header file.                             *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * The simple cooperative class implementation
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+
+#include <ode/common.h>
+#include <ode/cooperative.h>
+#include "config.h"
+#include "simple_cooperative.h"
+#include "default_threading.h"
+
+
+/*virtual */
+dxSimpleCooperative::~dxSimpleCooperative()
+{
+    // The virtual destructor
+}
+
+
+/*virtual */
+const dxThreadingFunctionsInfo *dxSimpleCooperative::retrieveThreadingDefaultImpl(dThreadingImplementationID &out_defaultImpl)
+{
+    out_defaultImpl = DefaultThreadingHolder::getDefaultThreadingImpl();
+    return DefaultThreadingHolder::getDefaultThreadingFunctions();
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// Public interface functions
+
+static inline 
+dCooperativeID encodeCooperativeID(dxSimpleCooperative *cooperativeInstance)
+{
+    return (dCooperativeID)cooperativeInstance;
+}
+
+
+/*extern ODE_API */
+dCooperativeID dCooperativeCreate(const dThreadingFunctionsInfo *functionInfo/*=NULL*/, dThreadingImplementationID threadingImpl/*=NULL*/)
+{
+    dxSimpleCooperative *cooperativeInstance = new dxSimpleCooperative(functionInfo, threadingImpl);
+    
+    dCooperativeID result = encodeCooperativeID(cooperativeInstance);
+    return result;
+}
+
+/*extern ODE_API */
+void dCooperativeDestroy(dCooperativeID cooperative)
+{
+    dxSimpleCooperative *cooperativeInstance = decodeCooperativeID(cooperative);
+
+    if (cooperativeInstance != NULL)
+    {
+        delete cooperativeInstance;
+    }
+}
+
diff --git a/libs/ode-0.16.1/ode/src/simple_cooperative.h b/libs/ode-0.16.1/ode/src/simple_cooperative.h
new file mode 100644
index 0000000..8fcbc99
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/simple_cooperative.h
@@ -0,0 +1,73 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading base wrapper class header file.                             *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * A simple cooperative class definition
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+
+#ifndef _ODE__PRIVATE_SIMPLE_COOPERATIVE_H_
+#define _ODE__PRIVATE_SIMPLE_COOPERATIVE_H_
+
+
+#include "objects.h"
+#include "threading_base.h"
+
+
+typedef dxThreadingBase dxSimpleCooperative_ThreadingParent;
+class dxSimpleCooperative:
+    public dBase,
+    public dxSimpleCooperative_ThreadingParent,
+    private dxIThreadingDefaultImplProvider
+{
+public:
+    dxSimpleCooperative(const dxThreadingFunctionsInfo *functionInfo, dThreadingImplementationID threadingImpl):
+        dBase(),
+        dxSimpleCooperative_ThreadingParent()
+    {
+        dxSimpleCooperative_ThreadingParent::setThreadingDefaultImplProvider(this);
+        dxSimpleCooperative_ThreadingParent::assignThreadingImpl(functionInfo, threadingImpl);
+    }
+
+    virtual ~dxSimpleCooperative();
+
+public:
+    dxThreadingBase *getRelatedThreading() const { return const_cast<dxSimpleCooperative *>(this); }
+
+private: // dxIThreadingDefaultImplProvider
+    virtual const dxThreadingFunctionsInfo *retrieveThreadingDefaultImpl(dThreadingImplementationID &out_defaultImpl);
+};
+
+
+static inline 
+dxSimpleCooperative *decodeCooperativeID(dCooperativeID cooperative)
+{
+    return (dxSimpleCooperative *)cooperative;
+}
+
+
+#endif // #ifndef _ODE__PRIVATE_SIMPLE_COOPERATIVE_H_
diff --git a/libs/ode-0.16.1/ode/src/sphere.cpp b/libs/ode-0.16.1/ode/src/sphere.cpp
new file mode 100644
index 0000000..e894bac
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/sphere.cpp
@@ -0,0 +1,251 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+standard ODE geometry primitives: public API and pairwise collision functions.
+
+the rule is that only the low level primitive collision functions should set
+dContactGeom::g1 and dContactGeom::g2.
+
+*/
+
+#include <ode/common.h>
+#include <ode/collision.h>
+#include <ode/rotation.h>
+#include "config.h"
+#include "matrix.h"
+#include "odemath.h"
+#include "collision_kernel.h"
+#include "collision_std.h"
+#include "collision_util.h"
+
+#ifdef _MSC_VER
+#pragma warning(disable:4291)  // for VC++, no complaints about "no matching operator delete found"
+#endif
+
+
+//****************************************************************************
+// sphere public API
+
+dxSphere::dxSphere (dSpaceID space, dReal _radius) : dxGeom (space,1)
+{
+    dAASSERT (_radius >= 0);
+    type = dSphereClass;
+    radius = _radius;
+    updateZeroSizedFlag(!_radius);
+}
+
+
+void dxSphere::computeAABB()
+{
+    aabb[0] = final_posr->pos[0] - radius;
+    aabb[1] = final_posr->pos[0] + radius;
+    aabb[2] = final_posr->pos[1] - radius;
+    aabb[3] = final_posr->pos[1] + radius;
+    aabb[4] = final_posr->pos[2] - radius;
+    aabb[5] = final_posr->pos[2] + radius;
+}
+
+
+dGeomID dCreateSphere (dSpaceID space, dReal radius)
+{
+    return new dxSphere (space,radius);
+}
+
+
+void dGeomSphereSetRadius (dGeomID g, dReal radius)
+{
+    dUASSERT (g && g->type == dSphereClass,"argument not a sphere");
+    dAASSERT (radius >= 0);
+    dxSphere *s = (dxSphere*) g;
+    s->radius = radius;
+    s->updateZeroSizedFlag(!radius);
+    dGeomMoved (g);
+}
+
+
+dReal dGeomSphereGetRadius (dGeomID g)
+{
+    dUASSERT (g && g->type == dSphereClass,"argument not a sphere");
+    dxSphere *s = (dxSphere*) g;
+    return s->radius;
+}
+
+
+dReal dGeomSpherePointDepth (dGeomID g, dReal x, dReal y, dReal z)
+{
+    dUASSERT (g && g->type == dSphereClass,"argument not a sphere");
+    g->recomputePosr();
+
+    dxSphere *s = (dxSphere*) g;
+    dReal * pos = s->final_posr->pos;
+    return s->radius - dSqrt ((x-pos[0])*(x-pos[0]) +
+        (y-pos[1])*(y-pos[1]) +
+        (z-pos[2])*(z-pos[2]));
+}
+
+//****************************************************************************
+// pairwise collision functions for standard geom types
+
+int dCollideSphereSphere (dxGeom *o1, dxGeom *o2, int flags,
+                          dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dSphereClass);
+    dIASSERT (o2->type == dSphereClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxSphere *sphere1 = (dxSphere*) o1;
+    dxSphere *sphere2 = (dxSphere*) o2;
+
+    contact->g1 = o1;
+    contact->g2 = o2;
+    contact->side1 = -1;
+    contact->side2 = -1;
+
+    return dCollideSpheres (o1->final_posr->pos,sphere1->radius,
+        o2->final_posr->pos,sphere2->radius,contact);
+}
+
+
+int dCollideSphereBox (dxGeom *o1, dxGeom *o2, int flags,
+                       dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dSphereClass);
+    dIASSERT (o2->type == dBoxClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    // this is easy. get the sphere center `p' relative to the box, and then clip
+    // that to the boundary of the box (call that point `q'). if q is on the
+    // boundary of the box and |p-q| is <= sphere radius, they touch.
+    // if q is inside the box, the sphere is inside the box, so set a contact
+    // normal to push the sphere to the closest box face.
+
+    dVector3 l,t,p,q,r;
+    dReal depth;
+    int onborder = 0;
+
+    dxSphere *sphere = (dxSphere*) o1;
+    dxBox *box = (dxBox*) o2;
+
+    contact->g1 = o1;
+    contact->g2 = o2;
+    contact->side1 = -1;
+    contact->side2 = -1;
+
+    p[0] = o1->final_posr->pos[0] - o2->final_posr->pos[0];
+    p[1] = o1->final_posr->pos[1] - o2->final_posr->pos[1];
+    p[2] = o1->final_posr->pos[2] - o2->final_posr->pos[2];
+
+    l[0] = box->side[0]*REAL(0.5);
+    t[0] = dCalcVectorDot3_14(p,o2->final_posr->R);
+    if (t[0] < -l[0]) { t[0] = -l[0]; onborder = 1; }
+    if (t[0] >  l[0]) { t[0] =  l[0]; onborder = 1; }
+
+    l[1] = box->side[1]*REAL(0.5);
+    t[1] = dCalcVectorDot3_14(p,o2->final_posr->R+1);
+    if (t[1] < -l[1]) { t[1] = -l[1]; onborder = 1; }
+    if (t[1] >  l[1]) { t[1] =  l[1]; onborder = 1; }
+
+    t[2] = dCalcVectorDot3_14(p,o2->final_posr->R+2);
+    l[2] = box->side[2]*REAL(0.5);
+    if (t[2] < -l[2]) { t[2] = -l[2]; onborder = 1; }
+    if (t[2] >  l[2]) { t[2] =  l[2]; onborder = 1; }
+
+    if (!onborder) {
+        // sphere center inside box. find closest face to `t'
+        dReal min_distance = l[0] - dFabs(t[0]);
+        int mini = 0;
+        for (int i=1; i<3; i++) {
+            dReal face_distance = l[i] - dFabs(t[i]);
+            if (face_distance < min_distance) {
+                min_distance = face_distance;
+                mini = i;
+            }
+        }
+        // contact position = sphere center
+        contact->pos[0] = o1->final_posr->pos[0];
+        contact->pos[1] = o1->final_posr->pos[1];
+        contact->pos[2] = o1->final_posr->pos[2];
+        // contact normal points to closest face
+        dVector3 tmp;
+        tmp[0] = 0;
+        tmp[1] = 0;
+        tmp[2] = 0;
+        tmp[mini] = (t[mini] > 0) ? REAL(1.0) : REAL(-1.0);
+        dMultiply0_331 (contact->normal,o2->final_posr->R,tmp);
+        // contact depth = distance to wall along normal plus radius
+        contact->depth = min_distance + sphere->radius;
+        return 1;
+    }
+
+    t[3] = 0;			//@@@ hmmm
+    dMultiply0_331 (q,o2->final_posr->R,t);
+    r[0] = p[0] - q[0];
+    r[1] = p[1] - q[1];
+    r[2] = p[2] - q[2];
+    depth = sphere->radius - dSqrt(dCalcVectorDot3(r,r));
+    if (depth < 0) return 0;
+    contact->pos[0] = q[0] + o2->final_posr->pos[0];
+    contact->pos[1] = q[1] + o2->final_posr->pos[1];
+    contact->pos[2] = q[2] + o2->final_posr->pos[2];
+    contact->normal[0] = r[0];
+    contact->normal[1] = r[1];
+    contact->normal[2] = r[2];
+    dNormalize3 (contact->normal);
+    contact->depth = depth;
+    return 1;
+}
+
+
+int dCollideSpherePlane (dxGeom *o1, dxGeom *o2, int flags,
+                         dContactGeom *contact, int skip)
+{
+    dIASSERT (skip >= (int)sizeof(dContactGeom));
+    dIASSERT (o1->type == dSphereClass);
+    dIASSERT (o2->type == dPlaneClass);
+    dIASSERT ((flags & NUMC_MASK) >= 1);
+
+    dxSphere *sphere = (dxSphere*) o1;
+    dxPlane *plane = (dxPlane*) o2;
+
+    contact->g1 = o1;
+    contact->g2 = o2;
+    contact->side1 = -1;
+    contact->side2 = -1;
+
+    dReal k = dCalcVectorDot3 (o1->final_posr->pos,plane->p);
+    dReal depth = plane->p[3] - k + sphere->radius;
+    if (depth >= 0) {
+        contact->normal[0] = plane->p[0];
+        contact->normal[1] = plane->p[1];
+        contact->normal[2] = plane->p[2];
+        contact->pos[0] = o1->final_posr->pos[0] - plane->p[0] * sphere->radius;
+        contact->pos[1] = o1->final_posr->pos[1] - plane->p[1] * sphere->radius;
+        contact->pos[2] = o1->final_posr->pos[2] - plane->p[2] * sphere->radius;
+        contact->depth = depth;
+        return 1;
+    }
+    else return 0;
+}
diff --git a/libs/ode-0.16.1/ode/src/step.cpp b/libs/ode-0.16.1/ode/src/step.cpp
new file mode 100644
index 0000000..033e879
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/step.cpp
@@ -0,0 +1,1672 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/odeconfig.h>
+#include <ode/rotation.h>
+#include <ode/timer.h>
+#include <ode/error.h>
+#include "config.h"
+#include "odemath.h"
+#include "matrix.h"
+#include "objects.h"
+#include "joints/joint.h"
+#include "lcp.h"
+#include "util.h"
+#include "threadingutils.h"
+
+#include <new>
+
+
+#define dMIN(A,B)  ((A)>(B) ? (B) : (A))
+#define dMAX(A,B)  ((B)>(A) ? (B) : (A))
+
+//****************************************************************************
+// misc defines
+
+//#define TIMING
+
+
+#ifdef TIMING
+#define IFTIMING(x) x
+#else
+#define IFTIMING(x) ((void)0)
+#endif
+
+
+struct dJointWithInfo1
+{
+    dxJoint *joint;
+    dxJoint::Info1 info;
+};
+
+enum dxRHSCFMElement
+{
+    RCE_RHS = dxJoint::GI2_RHS,
+    RCE_CFM = dxJoint::GI2_CFM,
+    
+    // Elements for array reuse
+    RLE_RHS = RCE_RHS,
+    RLE_LAMBDA = RCE_CFM,
+
+    RCE__RHS_CFM_MAX = dxJoint::GI2__RHS_CFM_MAX,
+    RLE__RHS_LAMBDA_MAX = RCE__RHS_CFM_MAX,
+};
+
+enum dxLoHiElement
+{
+    LHE_LO = dxJoint::GI2_LO,
+    LHE_HI = dxJoint::GI2_HI,
+
+    LHE__LO_HI_MAX = dxJoint::GI2__LO_HI_MAX,
+};
+
+enum dxJacobiVectorElement
+{
+    JVE__MIN,
+
+    JVE__L_MIN = JVE__MIN + dDA__L_MIN,
+
+    JVE_LX = JVE__L_MIN + dSA_X,
+    JVE_LY = JVE__L_MIN + dSA_Y,
+    JVE_LZ = JVE__L_MIN + dSA_Z,
+
+    JVE__L_MAX = JVE__L_MIN + dSA__MAX,
+
+    JVE__A_MIN = JVE__MIN + dDA__A_MIN,
+
+    JVE_AX = JVE__A_MIN + dSA_X,
+    JVE_AY = JVE__A_MIN + dSA_Y,
+    JVE_AZ = JVE__A_MIN + dSA_Z,
+
+    JVE__A_MAX = JVE__A_MIN + dSA__MAX,
+
+    JVE__MAX = JVE__MIN + dDA__MAX,
+
+    JVE__L_COUNT = JVE__L_MAX - JVE__L_MIN,
+    JVE__A_COUNT = JVE__A_MAX - JVE__A_MIN,
+};
+
+
+enum dxJacobiMatrixElement
+{
+    JME__MIN,
+
+    JME__J_MIN = JME__MIN,
+    JME__JL_MIN = JME__J_MIN + JVE__L_MIN,
+
+    JME_JLX = JME__J_MIN + JVE_LX,
+    JME_JLY = JME__J_MIN + JVE_LY,
+    JME_JLZ = JME__J_MIN + JVE_LZ,
+
+    JME__JL_MAX = JME__J_MIN + JVE__L_MAX,
+
+    JME__JA_MIN = JME__J_MIN + JVE__A_MIN,
+
+    JME_JAX = JME__J_MIN + JVE_AX,
+    JME_JAY = JME__J_MIN + JVE_AY,
+    JME_JAZ = JME__J_MIN + JVE_AZ,
+
+    JME__JA_MAX = JME__J_MIN + JVE__A_MAX,
+    JME__J_MAX = JME__J_MIN + JVE__MAX,
+
+    JME__MAX = JME__J_MAX,
+
+    JME__J_COUNT = JME__J_MAX - JME__J_MIN,
+};
+
+enum dxJInvMElement
+{
+    JIM__MIN,
+
+    JIM__L_MIN = JIM__MIN + dMD_LINEAR * dV3E__MAX,
+
+    JIM__L_AXES_MIN = JIM__L_MIN + dV3E__AXES_MIN,
+
+    JIM_LX = JIM__L_MIN + dV3E_X,
+    JIM_LY = JIM__L_MIN + dV3E_Y,
+    JIM_LZ = JIM__L_MIN + dV3E_Z,
+
+    JIM__L_AXES_MAX = JIM__L_MIN + dV3E__AXES_MAX,
+
+    JIM_LPAD = JIM__L_MIN + dV3E_PAD,
+
+    JIM__L_MAX = JIM__L_MIN + dV3E__MAX,
+
+    JIM__A_MIN = JIM__MIN + dMD_ANGULAR * dV3E__MAX,
+
+    JIM__A_AXES_MIN = JIM__A_MIN + dV3E__AXES_MIN,
+
+    JIM_AX = JIM__A_MIN + dV3E_X,
+    JIM_AY = JIM__A_MIN + dV3E_Y,
+    JIM_AZ = JIM__A_MIN + dV3E_Z,
+
+    JIM__A_AXES_MAX = JIM__A_MIN + dV3E__AXES_MAX,
+
+    JIM_APAD = JIM__A_MIN + dV3E_PAD,
+
+    JIM__A_MAX = JIM__A_MIN + dV3E__MAX,
+
+    JIM__MAX = JIM__MIN + dMD__MAX * dV3E__MAX,
+};
+
+enum dxContactForceElement
+{
+    CFE__MIN,
+
+    CFE__DYNAMICS_MIN = CFE__MIN,
+
+    CFE__L_MIN = CFE__DYNAMICS_MIN + dDA__L_MIN,
+
+    CFE_LX = CFE__DYNAMICS_MIN + dDA_LX,
+    CFE_LY = CFE__DYNAMICS_MIN + dDA_LY,
+    CFE_LZ = CFE__DYNAMICS_MIN + dDA_LZ,
+
+    CFE__L_MAX = CFE__DYNAMICS_MIN + dDA__L_MAX,
+
+    CFE__A_MIN = CFE__DYNAMICS_MIN + dDA__A_MIN,
+
+    CFE_AX = CFE__DYNAMICS_MIN + dDA_AX,
+    CFE_AY = CFE__DYNAMICS_MIN + dDA_AY,
+    CFE_AZ = CFE__DYNAMICS_MIN + dDA_AZ,
+
+    CFE__A_MAX = CFE__DYNAMICS_MIN + dDA__A_MAX,
+
+    CFE__DYNAMICS_MAX = CFE__DYNAMICS_MIN + dDA__MAX,
+
+    CFE__MAX = CFE__DYNAMICS_MAX,
+};
+
+
+#define AMATRIX_ALIGNMENT   dMAX(64, EFFICIENT_ALIGNMENT)
+#define INVI_ALIGNMENT      dMAX(32, EFFICIENT_ALIGNMENT)
+#define JINVM_ALIGNMENT     dMAX(64, EFFICIENT_ALIGNMENT)
+
+struct dxStepperStage0Outputs
+{
+    sizeint                         ji_start;
+    sizeint                         ji_end;
+    unsigned int                    m;
+    unsigned int                    nub;
+};
+
+struct dxStepperStage1CallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *stepperCallContext, void *stageMemArenaState, dReal *invI, dJointWithInfo1 *jointinfos)
+    {
+        m_stepperCallContext = stepperCallContext;
+        m_stageMemArenaState = stageMemArenaState;
+        m_invI = invI;
+        m_jointinfos = jointinfos;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    void                            *m_stageMemArenaState;
+    dReal                           *m_invI;
+    dJointWithInfo1                 *m_jointinfos;
+    dxStepperStage0Outputs          m_stage0Outputs;
+};
+
+struct dxStepperStage0BodiesCallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *stepperCallContext, dReal *invI)
+    {
+        m_stepperCallContext = stepperCallContext;
+        m_invI = invI;
+        m_tagsTaken = 0;
+        m_gravityTaken = 0;
+        m_inertiaBodyIndex = 0;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    dReal                           *m_invI;
+    atomicord32                     m_tagsTaken;
+    atomicord32                     m_gravityTaken;
+    volatile atomicord32            m_inertiaBodyIndex;
+};
+
+struct dxStepperStage0JointsCallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *stepperCallContext, dJointWithInfo1 *jointinfos, dxStepperStage0Outputs *stage0Outputs)
+    {
+        m_stepperCallContext = stepperCallContext;
+        m_jointinfos = jointinfos;
+        m_stage0Outputs = stage0Outputs;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    dJointWithInfo1                 *m_jointinfos;
+    dxStepperStage0Outputs          *m_stage0Outputs;
+};
+
+static int dxStepIsland_Stage0_Bodies_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+// static int dxStepIsland_Stage0_Joints_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxStepIsland_Stage1_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+
+static void dxStepIsland_Stage0_Bodies(dxStepperStage0BodiesCallContext *callContext);
+static void dxStepIsland_Stage0_Joints(dxStepperStage0JointsCallContext *callContext);
+static void dxStepIsland_Stage1(dxStepperStage1CallContext *callContext);
+
+
+struct dxStepperLocalContext
+{
+    void Initialize(dReal *invI, dJointWithInfo1 *jointinfos, unsigned int nj, 
+        unsigned int m, unsigned int nub, const unsigned int *mindex, int *findex, 
+        dReal *J, dReal *A, dReal *pairsRhsCfm, dReal *pairsLoHi, 
+        atomicord32 *bodyStartJoints, atomicord32 *bodyJointLinks)
+    {
+        m_invI = invI;
+        m_jointinfos = jointinfos;
+        m_nj = nj;
+        m_m = m;
+        m_nub = nub;
+        m_mindex = mindex;
+        m_findex = findex; 
+        m_J = J;
+        m_A = A;
+        m_pairsRhsCfm = pairsRhsCfm;
+        m_pairsLoHi = pairsLoHi;
+        m_bodyStartJoints = bodyStartJoints;
+        m_bodyJointLinks = bodyJointLinks;
+    }
+
+    dReal                           *m_invI;
+    dJointWithInfo1                 *m_jointinfos;
+    unsigned int                    m_nj;
+    unsigned int                    m_m;
+    unsigned int                    m_nub;
+    const unsigned int              *m_mindex;
+    int                             *m_findex;
+    dReal                           *m_J;
+    dReal                           *m_A;
+    dReal                           *m_pairsRhsCfm;
+    dReal                           *m_pairsLoHi;
+    atomicord32                     *m_bodyStartJoints;
+    atomicord32                     *m_bodyJointLinks;
+};
+
+struct dxStepperStage2CallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *callContext, const dxStepperLocalContext *localContext, 
+        dReal *JinvM, dReal *rhs_tmp)
+    {
+        m_stepperCallContext = callContext;
+        m_localContext = localContext;
+        m_JinvM = JinvM;
+        m_rhs_tmp = rhs_tmp;
+        m_ji_J = 0;
+        m_ji_Ainit = 0;
+        m_ji_JinvM = 0;
+        m_ji_Aaddjb = 0;
+        m_bi_rhs_tmp = 0;
+        m_ji_rhs = 0;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    const dxStepperLocalContext     *m_localContext;
+    dReal                           *m_JinvM;
+    dReal                           *m_rhs_tmp;
+    volatile atomicord32            m_ji_J;
+    volatile atomicord32            m_ji_Ainit;
+    volatile atomicord32            m_ji_JinvM;
+    volatile atomicord32            m_ji_Aaddjb;
+    volatile atomicord32            m_bi_rhs_tmp;
+    volatile atomicord32            m_ji_rhs;
+};
+
+struct dxStepperStage3CallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *callContext, const dxStepperLocalContext *localContext, 
+        void *stage1MemArenaState)
+    {
+        m_stepperCallContext = callContext;
+        m_localContext = localContext;
+        m_stage1MemArenaState = stage1MemArenaState;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    const dxStepperLocalContext     *m_localContext;
+    void                            *m_stage1MemArenaState;
+};
+
+struct dxStepperStage4CallContext
+{
+    void Initialize(const dxStepperProcessingCallContext *callContext, const dxStepperLocalContext *localContext/*, 
+        void *stage3MemarenaState*/)
+    {
+        m_stepperCallContext = callContext;
+        m_localContext = localContext;
+        // m_stage3MemarenaState = stage3MemarenaState;
+        m_bi_constrForce = 0;
+    }
+
+    const dxStepperProcessingCallContext *m_stepperCallContext;
+    const dxStepperLocalContext     *m_localContext;
+    // void                            *m_stage3MemarenaState;
+    volatile atomicord32            m_bi_constrForce;
+};
+
+static int dxStepIsland_Stage2a_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxStepIsland_Stage2aSync_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxStepIsland_Stage2b_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxStepIsland_Stage2bSync_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxStepIsland_Stage2c_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static int dxStepIsland_Stage3_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+
+static void dxStepIsland_Stage2a(dxStepperStage2CallContext *callContext);
+static void dxStepIsland_Stage2b(dxStepperStage2CallContext *callContext);
+static void dxStepIsland_Stage2c(dxStepperStage2CallContext *callContext);
+static void dxStepIsland_Stage3(dxStepperStage3CallContext *callContext);
+
+static int dxStepIsland_Stage4_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+static void dxStepIsland_Stage4(dxStepperStage4CallContext *stage4CallContext);
+
+
+//****************************************************************************
+// special matrix multipliers
+
+
+// this assumes the 4th and 8th rows of B and C are zero.
+
+static inline 
+void MultiplyAddJinvMxJToA (dReal *Arow, const dReal *JinvMRow, const dReal *JRow,
+    unsigned int infomJinvM, unsigned int infomJ, unsigned int mskip)
+{
+    dIASSERT (infomJinvM > 0 && infomJ > 0 && Arow && JinvMRow && JRow);
+    const unsigned int mskip_munus_infomJ_plus_1 = mskip - infomJ + 1;
+    dIASSERT(mskip >= infomJ);
+    dReal *currA = Arow;
+    const dReal *currJinvM = JinvMRow;
+    for (unsigned int i = infomJinvM; ; ) {
+        dReal JiM0 = currJinvM[JIM_LX];
+        dReal JiM1 = currJinvM[JIM_LY];
+        dReal JiM2 = currJinvM[JIM_LZ];
+        dReal JiM4 = currJinvM[JIM_AX];
+        dReal JiM5 = currJinvM[JIM_AY];
+        dReal JiM6 = currJinvM[JIM_AZ];
+        const dReal *currJ = JRow;
+        for (unsigned int j = infomJ; ; ) {
+            dReal sum;
+            sum  = JiM0 * currJ[JME_JLX];
+            sum += JiM1 * currJ[JME_JLY];
+            sum += JiM2 * currJ[JME_JLZ];
+            sum += JiM4 * currJ[JME_JAX];
+            sum += JiM5 * currJ[JME_JAY];
+            sum += JiM6 * currJ[JME_JAZ];
+            *currA += sum; 
+            if (--j == 0) {
+                break;
+            }
+            ++currA;
+            currJ += JME__MAX;
+        }
+        if (--i == 0) {
+            break;
+        }
+        currJinvM += JIM__MAX;
+        currA += mskip_munus_infomJ_plus_1;
+    }
+}
+
+
+// this assumes the 4th and 8th rows of B are zero.
+
+static inline 
+void MultiplySubJxRhsTmpFromRHS (dReal *rowRhsCfm, const dReal *JRow, const dReal *rowRhsTmp, unsigned int infom)
+{
+    dIASSERT (infom > 0 && rowRhsCfm && JRow && rowRhsTmp);
+    dReal *currRhs = rowRhsCfm + RCE_RHS;
+    const dReal *currJ = JRow;
+    const dReal RT_LX = rowRhsTmp[dDA_LX], RT_LY = rowRhsTmp[dDA_LY], RT_LZ = rowRhsTmp[dDA_LZ];
+    const dReal RT_AX = rowRhsTmp[dDA_AX], RT_AY = rowRhsTmp[dDA_AY], RT_AZ = rowRhsTmp[dDA_AZ];
+    for (unsigned int i = infom; ; ) {
+        dReal sum;
+        sum  = currJ[JME_JLX] * RT_LX;
+        sum += currJ[JME_JLY] * RT_LY;
+        sum += currJ[JME_JLZ] * RT_LZ;
+        sum += currJ[JME_JAX] * RT_AX;
+        sum += currJ[JME_JAY] * RT_AY;
+        sum += currJ[JME_JAZ] * RT_AZ;
+        *currRhs -= sum;
+        if (--i == 0) {
+            break;
+        }
+        currRhs += RCE__RHS_CFM_MAX;
+        currJ += JME__MAX;
+    }
+}
+
+
+static inline 
+void MultiplyAddJxLambdaToCForce(dReal cforce[CFE__MAX], 
+    const dReal *JRow, const dReal *rowRhsLambda, unsigned int infom, 
+    dJointFeedback *fb/*=NULL*/, unsigned jointBodyIndex)
+{
+    dIASSERT (infom > 0 && cforce && JRow && rowRhsLambda);
+    dReal sumLX = 0, sumLY = 0, sumLZ = 0, sumAX=0, sumAY = 0, sumAZ = 0;
+    const dReal *currJ = JRow, *currLambda = rowRhsLambda + RLE_LAMBDA;
+    for (unsigned int k = infom; ; ) {
+        const dReal lambda = *currLambda;
+        sumLX += currJ[JME_JLX] * lambda;
+        sumLY += currJ[JME_JLY] * lambda;
+        sumLZ += currJ[JME_JLZ] * lambda;
+        sumAX += currJ[JME_JAX] * lambda;
+        sumAY += currJ[JME_JAY] * lambda;
+        sumAZ += currJ[JME_JAZ] * lambda;
+        if (--k == 0) {
+            break;
+        }
+        currJ += JME__MAX;
+        currLambda += RLE__RHS_LAMBDA_MAX;
+    }
+    if (fb != NULL) {
+        if (jointBodyIndex == dJCB__MIN) {
+            fb->f1[dV3E_X] = sumLX;
+            fb->f1[dV3E_Y] = sumLY;
+            fb->f1[dV3E_Z] = sumLZ;
+            fb->t1[dV3E_X] = sumAX;
+            fb->t1[dV3E_Y] = sumAY;
+            fb->t1[dV3E_Z] = sumAZ;
+        }
+        else {
+            dIASSERT(jointBodyIndex == dJCB__MIN + 1);
+            dSASSERT(dJCB__MAX == 2);
+
+            fb->f2[dV3E_X] = sumLX;
+            fb->f2[dV3E_Y] = sumLY;
+            fb->f2[dV3E_Z] = sumLZ;
+            fb->t2[dV3E_X] = sumAX;
+            fb->t2[dV3E_Y] = sumAY;
+            fb->t2[dV3E_Z] = sumAZ;
+        }
+    }
+    cforce[CFE_LX] += sumLX;
+    cforce[CFE_LY] += sumLY;
+    cforce[CFE_LZ] += sumLZ;
+    cforce[CFE_AX] += sumAX;
+    cforce[CFE_AY] += sumAY;
+    cforce[CFE_AZ] += sumAZ;
+}
+
+
+//****************************************************************************
+
+/*extern */
+void dxStepIsland(const dxStepperProcessingCallContext *callContext)
+{
+    IFTIMING(dTimerStart("preprocessing"));
+
+    dxWorldProcessMemArena *memarena = callContext->m_stepperArena;
+    dxWorld *world = callContext->m_world;
+    unsigned int nb = callContext->m_islandBodiesCount;
+    unsigned int _nj = callContext->m_islandJointsCount;
+
+    dReal *invI = memarena->AllocateOveralignedArray<dReal>(dM3E__MAX * (sizeint)nb, INVI_ALIGNMENT);
+    // Reserve twice as much memory and start from the middle so that regardless of 
+    // what direction the array grows to there would be sufficient room available.
+    const sizeint ji_reserve_count = 2 * (sizeint)_nj;
+    dJointWithInfo1 *const jointinfos = memarena->AllocateArray<dJointWithInfo1>(ji_reserve_count);
+
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+    dIASSERT(allowedThreads != 0);
+
+    void *stagesMemArenaState = memarena->SaveState();
+
+    dxStepperStage1CallContext *stage1CallContext = (dxStepperStage1CallContext *)memarena->AllocateBlock(sizeof(dxStepperStage1CallContext));
+    stage1CallContext->Initialize(callContext, stagesMemArenaState, invI, jointinfos);
+
+    dxStepperStage0BodiesCallContext *stage0BodiesCallContext = (dxStepperStage0BodiesCallContext *)memarena->AllocateBlock(sizeof(dxStepperStage0BodiesCallContext));
+    stage0BodiesCallContext->Initialize(callContext, invI);
+    
+    dxStepperStage0JointsCallContext *stage0JointsCallContext = (dxStepperStage0JointsCallContext *)memarena->AllocateBlock(sizeof(dxStepperStage0JointsCallContext));
+    stage0JointsCallContext->Initialize(callContext, jointinfos, &stage1CallContext->m_stage0Outputs);
+
+    if (allowedThreads == 1)
+    {
+        dxStepIsland_Stage0_Bodies(stage0BodiesCallContext);
+        dxStepIsland_Stage0_Joints(stage0JointsCallContext);
+        dxStepIsland_Stage1(stage1CallContext);
+    }
+    else
+    {
+        unsigned bodyThreads = allowedThreads;
+        unsigned jointThreads = 1;
+
+        dCallReleaseeID stage1CallReleasee;
+        world->PostThreadedCallForUnawareReleasee(NULL, &stage1CallReleasee, bodyThreads + jointThreads, callContext->m_finalReleasee, 
+            NULL, &dxStepIsland_Stage1_Callback, stage1CallContext, 0, "StepIsland Stage1");
+
+        world->PostThreadedCallsGroup(NULL, bodyThreads, stage1CallReleasee, &dxStepIsland_Stage0_Bodies_Callback, stage0BodiesCallContext, "StepIsland Stage0-Bodies");
+
+        dxStepIsland_Stage0_Joints(stage0JointsCallContext);
+        world->AlterThreadedCallDependenciesCount(stage1CallReleasee, -1);
+        dIASSERT(jointThreads == 1);
+    }
+}    
+
+static 
+int dxStepIsland_Stage0_Bodies_Callback(void *_callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxStepperStage0BodiesCallContext *callContext = (dxStepperStage0BodiesCallContext *)_callContext;
+    dxStepIsland_Stage0_Bodies(callContext);
+    return 1;
+}
+
+static 
+void dxStepIsland_Stage0_Bodies(dxStepperStage0BodiesCallContext *callContext)
+{
+    dxBody * const *body = callContext->m_stepperCallContext->m_islandBodiesStart;
+    unsigned int nb = callContext->m_stepperCallContext->m_islandBodiesCount;
+
+    if (ThrsafeExchange(&callContext->m_tagsTaken, 1) == 0)
+    {
+        // number all bodies in the body list - set their tag values
+        for (unsigned int i=0; i<nb; i++) body[i]->tag = i;
+    }
+
+    if (ThrsafeExchange(&callContext->m_gravityTaken, 1) == 0)
+    {
+        dxWorld *world = callContext->m_stepperCallContext->m_world;
+
+        // add the gravity force to all bodies
+        // since gravity does normally have only one component it's more efficient
+        // to run three loops for each individual component
+        dxBody *const *const bodyend = body + nb;
+        dReal gravity_x = world->gravity[0];
+        if (gravity_x) {
+            for (dxBody *const *bodycurr = body; bodycurr != bodyend; ++bodycurr) {
+                dxBody *b = *bodycurr;
+                if ((b->flags & dxBodyNoGravity) == 0) {
+                    b->facc[dV3E_X] += b->mass.mass * gravity_x;
+                }
+            }
+        }
+        dReal gravity_y = world->gravity[1];
+        if (gravity_y) {
+            for (dxBody *const *bodycurr = body; bodycurr != bodyend; ++bodycurr) {
+                dxBody *b = *bodycurr;
+                if ((b->flags & dxBodyNoGravity) == 0) {
+                    b->facc[dV3E_Y] += b->mass.mass * gravity_y;
+                }
+            }
+        }
+        dReal gravity_z = world->gravity[2];
+        if (gravity_z) {
+            for (dxBody *const *bodycurr = body; bodycurr != bodyend; ++bodycurr) {
+                dxBody *b = *bodycurr;
+                if ((b->flags & dxBodyNoGravity) == 0) {
+                    b->facc[dV3E_Z] += b->mass.mass * gravity_z;
+                }
+            }
+        }
+    }
+
+    // for all bodies, compute the inertia tensor and its inverse in the global
+    // frame, and compute the rotational force and add it to the torque
+    // accumulator. I and invI are a vertical stack of 3x4 matrices, one per body.
+    {
+        dReal *invIrow = callContext->m_invI;
+        unsigned int bodyIndex = ThrsafeIncrementIntUpToLimit(&callContext->m_inertiaBodyIndex, nb);
+
+        for (unsigned int i = 0; i != nb; invIrow += dM3E__MAX, ++i) {
+            if (i == bodyIndex) {
+                dMatrix3 tmp;
+                dxBody *b = body[i];
+
+                // compute inverse inertia tensor in global frame
+                dMultiply2_333 (tmp, b->invI, b->posr.R);
+                dMultiply0_333 (invIrow, b->posr.R, tmp);
+
+                // Don't apply gyroscopic torques to bodies
+                // if not flagged or the body is kinematic
+                if ((b->flags & dxBodyGyroscopic) && (b->invMass > 0)) {
+                    dMatrix3 I;
+                    // compute inertia tensor in global frame
+                    dMultiply2_333 (tmp,b->mass.I,b->posr.R);
+                    dMultiply0_333 (I,b->posr.R,tmp);
+                    // compute rotational force
+#if 0
+                    // Explicit computation
+                    dMultiply0_331 (tmp,I,b->avel);
+                    dSubtractVectorCross3(b->tacc,b->avel,tmp);
+#else
+                    // Do the implicit computation based on 
+                    //"Stabilizing Gyroscopic Forces in Rigid Multibody Simulations"
+                    // (Lacoursière 2006)
+                    dReal h = callContext->m_stepperCallContext->m_stepSize; // Step size
+                    dVector3 L; // Compute angular momentum
+                    dMultiply0_331(L, I, b->avel);
+                    
+                    // Compute a new effective 'inertia tensor'
+                    // for the implicit step: the cross-product 
+                    // matrix of the angular momentum plus the
+                    // old tensor scaled by the timestep.  
+                    // Itild may not be symmetric pos-definite, 
+                    // but we can still use it to compute implicit
+                    // gyroscopic torques.
+                    dMatrix3 Itild = { 0 };  
+                    dSetCrossMatrixMinus(Itild, L, dV3E__MAX);
+                    for (int ii = dM3E__MIN; ii != dM3E__MAX; ++ii) {
+                      Itild[ii] = Itild[ii] * h + I[ii];
+                    }
+
+                    // Scale momentum by inverse time to get 
+                    // a sort of "torque"
+                    dScaleVector3(L, dRecip(h)); 
+                    // Invert the pseudo-tensor
+                    dMatrix3 itInv;
+                    // This is a closed-form inversion.
+                    // It's probably not numerically stable
+                    // when dealing with small masses with
+                    // a large asymmetry.
+                    // An LU decomposition might be better.
+                    if (dInvertMatrix3(itInv, Itild) != 0) {
+                        // "Divide" the original tensor
+                        // by the pseudo-tensor (on the right)
+                        dMultiply0_333(Itild, I, itInv);
+                        // Subtract an identity matrix
+                        Itild[dM3E_XX] -= 1; Itild[dM3E_YY] -= 1; Itild[dM3E_ZZ] -= 1;
+
+                        // This new inertia matrix rotates the 
+                        // momentum to get a new set of torques
+                        // that will work correctly when applied
+                        // to the old inertia matrix as explicit
+                        // torques with a semi-implicit update
+                        // step.
+                        dVector3 tau0;
+                        dMultiply0_331(tau0,Itild,L);
+                        
+                        // Add the gyro torques to the torque 
+                        // accumulator
+                        for (int ii = dSA__MIN; ii != dSA__MAX; ++ii) {
+                          b->tacc[dV3E__AXES_MIN + ii] += tau0[dV3E__AXES_MIN + ii];
+                        }
+                    }
+#endif
+                }
+
+                bodyIndex = ThrsafeIncrementIntUpToLimit(&callContext->m_inertiaBodyIndex, nb);
+            }
+        }
+    }
+}
+
+// static 
+// int dxStepIsland_Stage0_Joints_Callback(void *_callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+// {
+//     (void)callInstanceIndex; // unused
+//     (void)callThisReleasee; // unused
+//     dxStepperStage0JointsCallContext *callContext = (dxStepperStage0JointsCallContext *)_callContext;
+//     dxStepIsland_Stage0_Joints(callContext);
+//     return 1;
+// }
+
+static 
+void dxStepIsland_Stage0_Joints(dxStepperStage0JointsCallContext *callContext)
+{
+    dxJoint * const *_joint = callContext->m_stepperCallContext->m_islandJointsStart;
+    dJointWithInfo1 *jointinfos = callContext->m_jointinfos;
+    unsigned int _nj = callContext->m_stepperCallContext->m_islandJointsCount;
+
+    // get m = total constraint dimension, nub = number of unbounded variables.
+    // create constraint offset array and number-of-rows array for all joints.
+    // the constraints are re-ordered as follows: the purely unbounded
+    // constraints, the mixed unbounded + LCP constraints, and last the purely
+    // LCP constraints. this assists the LCP solver to put all unbounded
+    // variables at the start for a quick factorization.
+    //
+    // joints with m=0 are inactive and are removed from the joints array
+    // entirely, so that the code that follows does not consider them.
+    // also number all active joints in the joint list (set their tag values).
+    // inactive joints receive a tag value of -1.
+
+    sizeint ji_start, ji_end;
+    {
+        unsigned int mcurr = 0;
+        sizeint unb_start, mix_start, mix_end, lcp_end;
+        unb_start = mix_start = mix_end = lcp_end = _nj;
+
+        dJointWithInfo1 *jicurr = jointinfos + lcp_end;
+        dxJoint *const *const _jend = _joint + _nj;
+        dxJoint *const *_jcurr = _joint;
+        while (true) {
+            // -------------------------------------------------------------------------
+            // Switch to growing array forward
+            {
+                bool fwd_end_reached = false;
+                dJointWithInfo1 *jimixend = jointinfos + mix_end;
+                while (true) {	// jicurr=dest, _jcurr=src
+                    if (_jcurr == _jend) {
+                        lcp_end = jicurr - jointinfos;
+                        fwd_end_reached = true;
+                        break;
+                    }
+                    dxJoint *j = *_jcurr++;
+                    j->getInfo1 (&jicurr->info);
+                    dIASSERT (/*jicurr->info.m >= 0 && */jicurr->info.m <= 6 && /*jicurr->info.nub >= 0 && */jicurr->info.nub <= jicurr->info.m);
+                    if (jicurr->info.m != 0) {
+                        mcurr += jicurr->info.m;
+                        if (jicurr->info.nub == 0) { // A lcp info - a correct guess!!!
+                            jicurr->joint = j;
+                            ++jicurr;
+                        } else if (jicurr->info.nub < jicurr->info.m) { // A mixed case
+                            if (unb_start == mix_start) { // no unbounded infos yet - just move to opposite side of mixed-s
+                                unb_start = mix_start = mix_start - 1;
+                                dJointWithInfo1 *jimixstart = jointinfos + mix_start;
+                                jimixstart->info = jicurr->info;
+                                jimixstart->joint = j;
+                            } else if (jimixend != jicurr) { // have to swap to the tail of mixed-s
+                                dxJoint::Info1 tmp_info = jicurr->info;
+                                *jicurr = *jimixend;
+                                jimixend->info = tmp_info;
+                                jimixend->joint = j;
+                                ++jimixend; ++jicurr;
+                            } else { // no need to swap as there are no LCP info-s yet
+                                jicurr->joint = j;
+                                jimixend = jicurr = jicurr + 1;
+                            }
+                        } else { // A purely unbounded case -- break out and proceed growing in opposite direction
+                            unb_start = unb_start - 1;
+                            dJointWithInfo1 *jiunbstart = jointinfos + unb_start;
+                            jiunbstart->info = jicurr->info;
+                            jiunbstart->joint = j;
+                            lcp_end = jicurr - jointinfos;
+                            mix_end = jimixend - jointinfos;
+                            jicurr = jiunbstart - 1;
+                            break;
+                        }
+                    } else {
+                        j->tag = -1;
+                    }
+                }
+                if (fwd_end_reached) {
+                    break;
+                }
+            }
+            // -------------------------------------------------------------------------
+            // Switch to growing array backward
+            {
+                bool bkw_end_reached = false;
+                dJointWithInfo1 *jimixstart = jointinfos + mix_start - 1;
+                while (true) {	// jicurr=dest, _jcurr=src
+                    if (_jcurr == _jend) {
+                        unb_start = (jicurr + 1) - jointinfos;
+                        mix_start = (jimixstart + 1) - jointinfos;
+                        bkw_end_reached = true;
+                        break;
+                    }
+                    dxJoint *j = *_jcurr++;
+                    j->getInfo1 (&jicurr->info);
+                    dIASSERT (/*jicurr->info.m >= 0 && */jicurr->info.m <= 6 && /*jicurr->info.nub >= 0 && */jicurr->info.nub <= jicurr->info.m);
+                    if (jicurr->info.m != 0) {
+                        mcurr += jicurr->info.m;
+                        if (jicurr->info.nub == jicurr->info.m) { // An unbounded info - a correct guess!!!
+                            jicurr->joint = j;
+                            --jicurr;
+                        } else if (jicurr->info.nub != 0) { // A mixed case
+                            if (mix_end == lcp_end) { // no lcp infos yet - just move to opposite side of mixed-s
+                                dJointWithInfo1 *jimixend = jointinfos + mix_end;
+                                lcp_end = mix_end = mix_end + 1;
+                                jimixend->info = jicurr->info;
+                                jimixend->joint = j;
+                            } else if (jimixstart != jicurr) { // have to swap to the head of mixed-s
+                                dxJoint::Info1 tmp_info = jicurr->info;
+                                *jicurr = *jimixstart;
+                                jimixstart->info = tmp_info;
+                                jimixstart->joint = j;
+                                --jimixstart; --jicurr;
+                            } else { // no need to swap as there are no unbounded info-s yet
+                                jicurr->joint = j;
+                                jimixstart = jicurr = jicurr - 1;
+                            }
+                        } else { // A purely lcp case -- break out and proceed growing in opposite direction
+                            dJointWithInfo1 *jilcpend = jointinfos + lcp_end;
+                            lcp_end = lcp_end + 1;
+                            jilcpend->info = jicurr->info;
+                            jilcpend->joint = j;
+                            unb_start = (jicurr + 1) - jointinfos;
+                            mix_start = (jimixstart + 1) - jointinfos;
+                            jicurr = jilcpend + 1;
+                            break;
+                        }
+                    } else {
+                        j->tag = -1;
+                    }
+                }
+                if (bkw_end_reached) {
+                    break;
+                }
+            }
+        }
+
+        callContext->m_stage0Outputs->m = mcurr;
+        callContext->m_stage0Outputs->nub = (unsigned)(mix_start - unb_start);
+        dIASSERT((sizeint)(mix_start - unb_start) <= (sizeint)UINT_MAX);
+        ji_start = unb_start;
+        ji_end = lcp_end;
+    }
+
+    {
+        const dJointWithInfo1 *jicurr = jointinfos + ji_start;
+        const dJointWithInfo1 *const jiend = jointinfos + ji_end;
+        for (unsigned int i = 0; jicurr != jiend; i++, ++jicurr) {
+            jicurr->joint->tag = i;
+        }
+    }
+
+    callContext->m_stage0Outputs->ji_start = ji_start;
+    callContext->m_stage0Outputs->ji_end = ji_end;
+}
+
+static 
+int dxStepIsland_Stage1_Callback(void *_stage1CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxStepperStage1CallContext *stage1CallContext = (dxStepperStage1CallContext *)_stage1CallContext;
+    dxStepIsland_Stage1(stage1CallContext);
+    return 1;
+}
+
+static 
+void dxStepIsland_Stage1(dxStepperStage1CallContext *stage1CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage1CallContext->m_stepperCallContext;
+    dJointWithInfo1 *_jointinfos = stage1CallContext->m_jointinfos;
+    dReal *invI = stage1CallContext->m_invI;
+    sizeint ji_start = stage1CallContext->m_stage0Outputs.ji_start;
+    sizeint ji_end = stage1CallContext->m_stage0Outputs.ji_end;
+    unsigned int m = stage1CallContext->m_stage0Outputs.m;
+    unsigned int nub = stage1CallContext->m_stage0Outputs.nub;
+
+    dxWorldProcessMemArena *memarena = callContext->m_stepperArena;
+    {
+        memarena->RestoreState(stage1CallContext->m_stageMemArenaState);
+        stage1CallContext = NULL; // WARNING! _stage1CallContext is not valid after this point!
+        dIVERIFY(stage1CallContext == NULL); // To suppress compiler warnings about unused variable assignment
+
+        unsigned int _nj = callContext->m_islandJointsCount;
+        const sizeint ji_reserve_count = 2 * (sizeint)_nj;
+        memarena->ShrinkArray<dJointWithInfo1>(_jointinfos, ji_reserve_count, ji_end);
+    }
+
+    dJointWithInfo1 *jointinfos = _jointinfos + ji_start;
+    unsigned int nj = (unsigned int)(ji_end - ji_start);
+    dIASSERT((sizeint)(ji_end - ji_start) <= (sizeint)UINT_MAX);
+
+    unsigned int *mindex = NULL;
+    dReal *J = NULL, *A = NULL, *pairsRhsCfm = NULL, *pairsLoHi = NULL;
+    int *findex = NULL;
+    atomicord32 *bodyStartJoints = NULL, *bodyJointLinks = NULL;
+
+    // if there are constraints, compute constrForce
+    if (m > 0) {
+        mindex = memarena->AllocateArray<unsigned int>((sizeint)(nj + 1));
+        {
+            unsigned int *mcurr = mindex;
+            unsigned int moffs = 0;
+            mcurr[0] = moffs;
+            mcurr += 1;
+
+            const dJointWithInfo1 *const jiend = jointinfos + nj;
+            for (const dJointWithInfo1 *jicurr = jointinfos; jicurr != jiend; ++jicurr) {
+                //dxJoint *joint = jicurr->joint;
+                moffs += jicurr->info.m;
+                mcurr[0] = moffs;
+                mcurr += 1;
+            }
+        }
+
+        // create a constraint equation right hand side vector `c', a constraint
+        // force mixing vector `cfm', and LCP low and high bound vectors, and an
+        // 'findex' vector.
+        findex = memarena->AllocateArray<int>(m);
+        J = memarena->AllocateArray<dReal>((sizeint)m * (2 * JME__MAX));
+        A = memarena->AllocateOveralignedArray<dReal>((sizeint)m * dPAD(m), AMATRIX_ALIGNMENT);
+        pairsRhsCfm = memarena->AllocateArray<dReal>((sizeint)m * RCE__RHS_CFM_MAX);
+        pairsLoHi = memarena->AllocateArray<dReal>((sizeint)m * LHE__LO_HI_MAX);
+        const unsigned int nb = callContext->m_islandBodiesCount;
+        bodyStartJoints = memarena->AllocateArray<atomicord32>(nb);
+        bodyJointLinks = memarena->AllocateArray<atomicord32>((sizeint)nj * dJCB__MAX);
+        dICHECK(nj < ~((atomicord32)0) / dJCB__MAX); // If larger joint counts are to be used, pointers (or sizeint) need to be stored rather than atomicord32 indices
+    }
+
+    dxStepperLocalContext *localContext = (dxStepperLocalContext *)memarena->AllocateBlock(sizeof(dxStepperLocalContext));
+    localContext->Initialize(invI, jointinfos, nj, m, nub, mindex, findex, J, A, pairsRhsCfm, pairsLoHi, bodyStartJoints, bodyJointLinks);
+
+    void *stage1MemarenaState = memarena->SaveState();
+    dxStepperStage3CallContext *stage3CallContext = (dxStepperStage3CallContext*)memarena->AllocateBlock(sizeof(dxStepperStage3CallContext));
+    stage3CallContext->Initialize(callContext, localContext, stage1MemarenaState);
+
+    if (m > 0) {
+        dReal *JinvM = memarena->AllocateOveralignedArray<dReal>((sizeint)m * (2 * JIM__MAX), JINVM_ALIGNMENT);
+        const unsigned int nb = callContext->m_islandBodiesCount;
+        dReal *rhs_tmp = memarena->AllocateArray<dReal>((sizeint)nb * dDA__MAX);
+
+        dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)memarena->AllocateBlock(sizeof(dxStepperStage2CallContext));
+        stage2CallContext->Initialize(callContext, localContext, JinvM, rhs_tmp);
+
+        const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+        dIASSERT(allowedThreads != 0);
+
+        if (allowedThreads == 1) {
+            IFTIMING(dTimerNow("create J"));
+            dxStepIsland_Stage2a(stage2CallContext);
+            IFTIMING(dTimerNow("compute Adiag, JinvM and rhs_tmp"));
+            dxStepIsland_Stage2b(stage2CallContext);
+            IFTIMING(dTimerNow("compute A and rhs"));
+            dxStepIsland_Stage2c(stage2CallContext);
+            dxStepIsland_Stage3(stage3CallContext);
+        }
+        else {
+            dxWorld *world = callContext->m_world;
+            dCallReleaseeID stage3CallReleasee;
+            world->PostThreadedCallForUnawareReleasee(NULL, &stage3CallReleasee, 1, callContext->m_finalReleasee, 
+                NULL, &dxStepIsland_Stage3_Callback, stage3CallContext, 0, "StepIsland Stage3");
+
+            dCallReleaseeID stage2bSyncReleasee;
+            world->PostThreadedCall(NULL, &stage2bSyncReleasee, 1, stage3CallReleasee, 
+                NULL, &dxStepIsland_Stage2bSync_Callback, stage2CallContext, 0, "StepIsland Stage2b Sync");
+
+            dCallReleaseeID stage2aSyncReleasee;
+            world->PostThreadedCall(NULL, &stage2aSyncReleasee, allowedThreads, stage2bSyncReleasee, 
+                NULL, &dxStepIsland_Stage2aSync_Callback, stage2CallContext, 0, "StepIsland Stage2a Sync");
+
+            dIASSERT(allowedThreads > 1); /*if (allowedThreads > 1) */{
+                world->PostThreadedCallsGroup(NULL, allowedThreads - 1, stage2aSyncReleasee, &dxStepIsland_Stage2a_Callback, stage2CallContext, "StepIsland Stage2a");
+            }
+            dxStepIsland_Stage2a(stage2CallContext);
+            world->AlterThreadedCallDependenciesCount(stage2aSyncReleasee, -1);
+        }
+    }
+    else {
+        dxStepIsland_Stage3(stage3CallContext);
+    }
+}
+
+
+static 
+int dxStepIsland_Stage2a_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)_stage2CallContext;
+    dxStepIsland_Stage2a(stage2CallContext);
+    return 1;
+}
+
+static 
+void dxStepIsland_Stage2a(dxStepperStage2CallContext *stage2CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
+    const dxStepperLocalContext *localContext = stage2CallContext->m_localContext;
+    dJointWithInfo1 *jointinfos = localContext->m_jointinfos;
+    unsigned int nj = localContext->m_nj;
+    const unsigned int *mindex = localContext->m_mindex;
+
+    const dReal stepsizeRecip = dRecip(callContext->m_stepSize);
+    dxWorld *world = callContext->m_world;
+
+    {
+        int *findex = localContext->m_findex;
+        dReal *J = localContext->m_J;
+        dReal *pairsRhsCfm = localContext->m_pairsRhsCfm;
+        dReal *pairsLoHi = localContext->m_pairsLoHi;
+
+        // get jacobian data from constraints. a (2*m)x8 matrix will be created
+        // to store the two jacobian blocks from each constraint. it has this
+        // format:
+        //
+        //   l l l 0 a a a 0  \    .
+        //   l l l 0 a a a 0   }-- jacobian body 1 block for joint 0 (3 rows)
+        //   l l l 0 a a a 0  /
+        //   l l l 0 a a a 0  \    .
+        //   l l l 0 a a a 0   }-- jacobian body 2 block for joint 0 (3 rows)
+        //   l l l 0 a a a 0  /
+        //   l l l 0 a a a 0  }--- jacobian body 1 block for joint 1 (1 row)
+        //   l l l 0 a a a 0  }--- jacobian body 2 block for joint 1 (1 row)
+        //   etc...
+        //
+        //   (lll) = linear jacobian data
+        //   (aaa) = angular jacobian data
+        //
+
+        const dReal worldERP = world->global_erp;
+        const dReal worldCFM = world->global_cfm;
+
+        unsigned ji;
+        while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_J, nj)) != nj) {
+            const unsigned ofsi = mindex[ji];
+            const unsigned int infom = mindex[ji + 1] - ofsi;
+
+            dReal *const JRow = J + (sizeint)ofsi * (2 * JME__MAX);
+            dReal *rowRhsCfm = pairsRhsCfm + (sizeint)ofsi * RCE__RHS_CFM_MAX;
+            dReal *rowLoHi = pairsLoHi + (sizeint)ofsi * LHE__LO_HI_MAX;
+            {
+                dSetZero (JRow, infom * (2 * JME__MAX));
+
+                dReal *const endRhsCfm = rowRhsCfm + infom * RCE__RHS_CFM_MAX;
+                for (dReal *currRhsCfm = rowRhsCfm; currRhsCfm != endRhsCfm; currRhsCfm += RCE__RHS_CFM_MAX) {
+                    currRhsCfm[RCE_RHS] = REAL(0.0);
+                    currRhsCfm[RCE_CFM] = worldCFM;
+                }
+
+                dReal *const endLoHi = rowLoHi + infom * LHE__LO_HI_MAX;
+                for (dReal *currLoHi = rowLoHi; currLoHi != endLoHi; currLoHi += LHE__LO_HI_MAX) {
+                    currLoHi[LHE_LO] = -dInfinity;
+                    currLoHi[LHE_HI] = dInfinity;
+                }
+            }
+            int *findexRow = findex + ofsi;
+            dSetValue(findexRow, infom, -1);
+
+            dxJoint *joint = jointinfos[ji].joint;
+            joint->getInfo2(stepsizeRecip, worldERP, JME__MAX, JRow + JME__J_MIN, JRow + infom * JME__MAX + JME__J_MIN, RCE__RHS_CFM_MAX, rowRhsCfm, rowLoHi, findexRow);
+            dSASSERT((int)LHE__LO_HI_MAX == RCE__RHS_CFM_MAX); // To make sure same step fits for both pairs in the call to dxJoint::getInfo2() above
+
+            // findex iteration is compact and is not going to pollute caches - do it first
+            {
+                // adjust returned findex values for global index numbering
+                int *const findicesEnd = findexRow + infom;
+                for (int *findexCurr = findexRow; findexCurr != findicesEnd; ++findexCurr) {
+                    int fival = *findexCurr;
+                    if (fival != -1) {
+                        *findexCurr = fival + ofsi;
+                    }
+                }
+            }
+            {
+                dReal *const endRhsCfm = rowRhsCfm + infom * RCE__RHS_CFM_MAX;
+                for (dReal *currRhsCfm = rowRhsCfm; currRhsCfm != endRhsCfm; currRhsCfm += RCE__RHS_CFM_MAX) {
+                    currRhsCfm[RCE_RHS] *= stepsizeRecip;
+                    currRhsCfm[RCE_CFM] *= stepsizeRecip;
+                }
+            }
+        }
+    }
+}
+
+static 
+int dxStepIsland_Stage2aSync_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)_stage2CallContext;
+    const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+
+    dIASSERT(allowedThreads > 1); /*if (allowedThreads > 1) */{ // The allowed thread count is greater than one as otherwise current function would not be scheduled for execution from the previous stage
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(callThisReleasee, allowedThreads - 1);
+        world->PostThreadedCallsGroup(NULL, allowedThreads - 1, callThisReleasee, &dxStepIsland_Stage2b_Callback, stage2CallContext, "StepIsland Stage2b");
+    }
+    dxStepIsland_Stage2b(stage2CallContext);
+
+    return 1;
+}
+
+static 
+int dxStepIsland_Stage2b_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)_stage2CallContext;
+    dxStepIsland_Stage2b(stage2CallContext);
+    return 1;
+}
+
+static 
+void dxStepIsland_Stage2b(dxStepperStage2CallContext *stage2CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
+    const dxStepperLocalContext *localContext = stage2CallContext->m_localContext;
+    dJointWithInfo1 *jointinfos = localContext->m_jointinfos;
+    unsigned int nj = localContext->m_nj;
+    const unsigned int *mindex = localContext->m_mindex;
+
+    {
+        // Warning!!!
+        // This code depends on cfm elements and therefore must be in different sub-stage 
+        // from Jacobian construction in Stage2a to ensure proper synchronization 
+        // and avoid accessing numbers being modified.
+        // Warning!!!
+        dReal *A = localContext->m_A;
+        const dReal *pairsRhsCfm = localContext->m_pairsRhsCfm;
+        const unsigned m = localContext->m_m;
+
+        const unsigned int mskip = dPAD(m);
+
+        unsigned ji;
+        while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_Ainit, nj)) != nj) {
+            const unsigned ofsi = mindex[ji];
+            const unsigned int infom = mindex[ji + 1] - ofsi;
+
+            dReal *Arow = A + (sizeint)mskip * ofsi;
+            dSetZero(Arow, (sizeint)mskip * infom);
+            dReal *Adiag = Arow + ofsi;
+            const dReal *rowRfsCrm = pairsRhsCfm + (sizeint)ofsi * RCE__RHS_CFM_MAX;
+            for (unsigned int i = 0; i != infom; Adiag += mskip, ++i) {
+                Adiag[i] = (rowRfsCrm + i * RCE__RHS_CFM_MAX)[RCE_CFM];
+            }
+        }
+    }
+
+    {
+        // Warning!!!
+        // This code depends on J elements and therefore must be in different sub-stage 
+        // from Jacobian construction in Stage2a to ensure proper synchronization 
+        // and avoid accessing numbers being modified.
+        // Warning!!!
+        const dReal *invI = localContext->m_invI;
+        const dReal *J = localContext->m_J;
+        dReal *JinvM = stage2CallContext->m_JinvM;
+
+        // compute A = J*invM*J'. first compute JinvM = J*invM. this has the same
+        // format as J so we just go through the constraints in J multiplying by
+        // the appropriate scalars and matrices.
+        unsigned ji;
+        while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_JinvM, nj)) != nj) {
+            const unsigned ofsi = mindex[ji];
+            const unsigned int infom = mindex[ji + 1] - ofsi;
+
+            dReal *Jdst = JinvM + (sizeint)ofsi * (2 * JIM__MAX);
+            dSetZero(Jdst, infom * (2 * JIM__MAX));
+
+            const dReal *Jsrc = J + (sizeint)ofsi * (2 * JME__MAX);
+            dxJoint *joint = jointinfos[ji].joint;
+
+            dxBody *jb0 = joint->node[0].body;
+            if (true || jb0 != NULL) { // -- always true
+                dReal body_invMass0 = jb0->invMass;
+                const dReal *body_invI0 = invI + (sizeint)(unsigned int)jb0->tag * dM3E__MAX;
+                for (unsigned int j = infom; j != 0; --j) {
+                    for (unsigned int k = dSA__MIN; k != dSA__MAX; ++k) Jdst[JIM__L_AXES_MIN + k] = Jsrc[JME__JL_MIN + k] * body_invMass0;
+                    dMultiply0_133(Jdst + JIM__A_AXES_MIN, Jsrc + JME__JA_MIN, body_invI0);
+                    Jsrc += JME__MAX;
+                    Jdst += JIM__MAX;
+                }
+            }
+
+            dxBody *jb1 = joint->node[1].body;
+            if (jb1 != NULL) {
+                dReal body_invMass1 = jb1->invMass;
+                const dReal *body_invI1 = invI + (sizeint)(unsigned int)jb1->tag * dM3E__MAX;
+                for (unsigned int j = infom; j != 0; --j) {
+                    for (unsigned int k = dSA__MIN; k != dSA__MAX; ++k) Jdst[JIM__L_AXES_MIN + k] = Jsrc[JME__JL_MIN + k] * body_invMass1;
+                    dMultiply0_133 (Jdst + JIM__A_AXES_MIN, Jsrc + JME__JA_MIN, body_invI1);
+                    Jsrc += JME__MAX;
+                    Jdst += JIM__MAX;
+                }
+            }
+        }
+    }
+
+    {
+        // Warning!!!
+        // This code reads facc/tacc fields of body objects which (the fields)
+        // may be modified by dxJoint::getInfo2(). Therefore the code must be
+        // in different sub-stage from Jacobian construction in Stage2a 
+        // to ensure proper synchronization and avoid accessing numbers being modified.
+        // Warning!!!
+        dxBody * const *const body = callContext->m_islandBodiesStart;
+        const unsigned int nb = callContext->m_islandBodiesCount;
+        const dReal *invI = localContext->m_invI;
+        atomicord32 *bodyStartJoints = localContext->m_bodyStartJoints;
+        dReal *rhs_tmp = stage2CallContext->m_rhs_tmp;
+
+        // compute the right hand side `rhs'
+        const dReal stepsizeRecip = dRecip(callContext->m_stepSize);
+
+        // put v/h + invM*fe into rhs_tmp
+        unsigned bi;
+        while ((bi = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_bi_rhs_tmp, nb)) != nb) {
+            dReal *tmp1curr = rhs_tmp + (sizeint)bi * dDA__MAX;
+            const dReal *invIrow = invI + (sizeint)bi * dM3E__MAX;
+            dxBody *b = body[bi];
+            // dSetZero(tmp1curr, 8); -- not needed
+            for (unsigned int j = dSA__MIN; j != dSA__MAX; ++j) tmp1curr[dDA__L_MIN + j] = b->facc[dV3E__AXES_MIN + j] * b->invMass + b->lvel[dV3E__AXES_MIN + j] * stepsizeRecip;
+            dMultiply0_331 (tmp1curr + dDA__A_MIN, invIrow, b->tacc);
+            for (unsigned int k = dSA__MIN; k != dSA__MAX; ++k) tmp1curr[dDA__A_MIN + k] += b->avel[dV3E__AXES_MIN + k] * stepsizeRecip;
+            // Initialize body start joint indices -- this will be needed later for building body related joint list in dxStepIsland_Stage2c
+            bodyStartJoints[bi] = 0;
+        }
+    }
+}
+
+static 
+int dxStepIsland_Stage2bSync_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)_stage2CallContext;
+    const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+
+    dIASSERT(allowedThreads > 1); /*if (allowedThreads > 1) */{ // The allowed thread count is greater than one as otherwise current function would not be scheduled for execution from the previous stage
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(callThisReleasee, allowedThreads - 1);
+        world->PostThreadedCallsGroup(NULL, allowedThreads - 1, callThisReleasee, &dxStepIsland_Stage2c_Callback, stage2CallContext, "StepIsland Stage2c");
+    }
+    dxStepIsland_Stage2c(stage2CallContext);
+
+    return 1;
+}
+
+
+static 
+int dxStepIsland_Stage2c_Callback(void *_stage2CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxStepperStage2CallContext *stage2CallContext = (dxStepperStage2CallContext *)_stage2CallContext;
+    dxStepIsland_Stage2c(stage2CallContext);
+    return 1;
+}
+
+static 
+void dxStepIsland_Stage2c(dxStepperStage2CallContext *stage2CallContext)
+{
+    //const dxStepperProcessingCallContext *callContext = stage2CallContext->m_stepperCallContext;
+    const dxStepperLocalContext *localContext = stage2CallContext->m_localContext;
+    dJointWithInfo1 *jointinfos = localContext->m_jointinfos;
+    unsigned int nj = localContext->m_nj;
+    const unsigned int *mindex = localContext->m_mindex;
+
+    {
+        // Warning!!!
+        // This code depends on A elements and JinvM elements and therefore 
+        // must be in different sub-stage from A initialization and JinvM calculation in Stage2b 
+        // to ensure proper synchronization and avoid accessing numbers being modified.
+        // Warning!!!
+        dReal *A = localContext->m_A;
+        const dReal *JinvM = stage2CallContext->m_JinvM;
+        const dReal *J = localContext->m_J;
+        const unsigned m = localContext->m_m;
+
+        // now compute A = JinvM * J'. A's rows and columns are grouped by joint,
+        // i.e. in the same way as the rows of J. block (i,j) of A is only nonzero
+        // if joints i and j have at least one body in common. 
+        const unsigned int mskip = dPAD(m);
+
+        unsigned ji;
+        while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_Aaddjb, nj)) != nj) {
+            const unsigned ofsi = mindex[ji];
+            const unsigned int infom = mindex[ji + 1] - ofsi;
+
+            dReal *Arow = A + (sizeint)mskip * ofsi;
+            const dReal *JinvMRow = JinvM + (sizeint)ofsi * (2 * JIM__MAX);
+            dxJoint *joint = jointinfos[ji].joint;
+
+            dxBody *jb0 = joint->node[0].body;
+            if (true || jb0 != NULL) { // -- always true
+                // compute diagonal block of A
+                const dReal *JRow = J + (sizeint)ofsi * (2 * JME__MAX);
+                MultiplyAddJinvMxJToA (Arow + ofsi, JinvMRow, JRow, infom, infom, mskip);
+
+                for (dxJointNode *n0 = (ji != 0 ? jb0->firstjoint : NULL); n0; n0 = n0->next) {
+                    // if joint was tagged as -1 then it is an inactive (m=0 or disabled)
+                    // joint that should not be considered
+                    int j0 = n0->joint->tag;
+                    if (j0 != -1 && (unsigned)j0 < ji) {
+                        const unsigned int jiother_ofsi = mindex[j0];
+                        const unsigned int jiother_infom = mindex[j0 + 1] - jiother_ofsi;
+                        const dJointWithInfo1 *jiother = jointinfos + j0;
+                        unsigned int smart_infom = (jiother->joint->node[1].body == jb0) ? jiother_infom : 0;
+                        // set block of A
+                        const dReal *JOther = J + ((sizeint)jiother_ofsi * 2 + smart_infom) * JME__MAX;
+                        MultiplyAddJinvMxJToA (Arow + jiother_ofsi, JinvMRow, JOther, infom, jiother_infom, mskip);
+                    }
+                }
+            }
+
+            dxBody *jb1 = joint->node[1].body;
+            dIASSERT(jb1 != jb0);
+            if (jb1 != NULL) {
+                const dReal *JinvMOther = JinvMRow + infom * JIM__MAX;
+                // compute diagonal block of A
+                const dReal *JRow = J + ((sizeint)ofsi * 2 + infom) * JME__MAX;
+                MultiplyAddJinvMxJToA (Arow + ofsi, JinvMOther, JRow, infom, infom, mskip);
+
+                for (dxJointNode *n1 = (ji != 0 ? jb1->firstjoint : NULL); n1; n1 = n1->next) {
+                    // if joint was tagged as -1 then it is an inactive (m=0 or disabled)
+                    // joint that should not be considered
+                    int j1 = n1->joint->tag;
+                    if (j1 != -1 && (unsigned)j1 < ji) {
+                        const unsigned int jiother_ofsi = mindex[j1];
+                        const unsigned int jiother_infom = mindex[j1 + 1] - jiother_ofsi;
+                        const dJointWithInfo1 *jiother = jointinfos + j1;
+                        unsigned int smart_infom = (jiother->joint->node[1].body == jb1) ? jiother_infom : 0;
+                        // set block of A
+                        const dReal *JOther = J + ((sizeint)jiother_ofsi * 2 + smart_infom) * JME__MAX;
+                        MultiplyAddJinvMxJToA (Arow + jiother_ofsi, JinvMOther, JOther, infom, jiother_infom, mskip);
+                    }
+                }
+            }
+        }
+    }
+
+    {
+        // Warning!!!
+        // This code depends on rhs_tmp elements and therefore must be in 
+        // different sub-stage from rhs_tmp calculation in Stage2b to ensure 
+        // proper synchronization and avoid accessing numbers being modified.
+        // Warning!!!
+        const dReal *J = localContext->m_J;
+        const dReal *rhs_tmp = stage2CallContext->m_rhs_tmp;
+        dReal *pairsRhsCfm = localContext->m_pairsRhsCfm;
+        atomicord32 *bodyStartJoints = localContext->m_bodyStartJoints;
+        atomicord32 *bodyJointLinks = localContext->m_bodyJointLinks;
+
+        // compute the right hand side `rhs'
+        // put J*rhs_tmp into rhs
+        unsigned ji;
+        while ((ji = ThrsafeIncrementIntUpToLimit(&stage2CallContext->m_ji_rhs, nj)) != nj) {
+            const unsigned ofsi = mindex[ji];
+            const unsigned int infom = mindex[ji + 1] - ofsi;
+
+            dReal *currRhsCfm = pairsRhsCfm + (sizeint)ofsi * RCE__RHS_CFM_MAX;
+            const dReal *JRow = J + (sizeint)ofsi * (2 * JME__MAX);
+            
+            dxJoint *joint = jointinfos[ji].joint;
+
+            dxBody *jb0 = joint->node[0].body;
+            if (true || jb0 != NULL) { // -- always true
+                unsigned bodyIndex = (unsigned)jb0->tag;
+                MultiplySubJxRhsTmpFromRHS (currRhsCfm, JRow, rhs_tmp + (sizeint)bodyIndex * dDA__MAX, infom);
+
+                // Link joints connected to each body into a list to be used on results incorporation. The bodyStartJoints have been initialized in dxStepIsland_Stage2b.
+                const atomicord32 linkIndex = (atomicord32)((sizeint)ji * dJCB__MAX + dJCB_FIRST_BODY); // It is asserted at links buffer allocation that the indices can't overflow atomicord32
+                for (atomicord32 oldStartIndex = bodyStartJoints[bodyIndex]; ; oldStartIndex = bodyStartJoints[bodyIndex]) {
+                    bodyJointLinks[linkIndex] = oldStartIndex;
+                    if (ThrsafeCompareExchange(&bodyStartJoints[bodyIndex], oldStartIndex, linkIndex + 1)) { // The link index is stored incremented to allow 0 as end indicator
+                        break;
+                    }
+                }
+            }
+
+            dxBody *jb1 = joint->node[1].body;
+            if (jb1 != NULL) {
+                unsigned bodyIndex = (unsigned)jb1->tag;
+                MultiplySubJxRhsTmpFromRHS (currRhsCfm, JRow + infom * JME__MAX, rhs_tmp + (sizeint)bodyIndex * dDA__MAX, infom);
+
+                // Link joints connected to each body into a list to be used on results incorporation. The bodyStartJoints have been initialized in dxStepIsland_Stage2b
+                const atomicord32 linkIndex = (atomicord32)((sizeint)ji * dJCB__MAX + dJCB_SECOND_BODY); // It is asserted at links buffer allocation that the indices can't overflow atomicord32
+                for (atomicord32 oldStartIndex = bodyStartJoints[bodyIndex]; ; oldStartIndex = bodyStartJoints[bodyIndex]) {
+                    bodyJointLinks[linkIndex] = oldStartIndex;
+                    if (ThrsafeCompareExchange(&bodyStartJoints[bodyIndex], oldStartIndex, linkIndex + 1)) { // The link index is stored incremented to allow 0 as end indicator
+                        break;
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+static 
+int dxStepIsland_Stage3_Callback(void *_stage3CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxStepperStage3CallContext *stage3CallContext = (dxStepperStage3CallContext *)_stage3CallContext;
+    dxStepIsland_Stage3(stage3CallContext);
+    return 1;
+}
+
+static 
+void dxStepIsland_Stage3(dxStepperStage3CallContext *stage3CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage3CallContext->m_stepperCallContext;
+    const dxStepperLocalContext *localContext = stage3CallContext->m_localContext;
+
+    dxWorldProcessMemArena *memarena = callContext->m_stepperArena;
+    memarena->RestoreState(stage3CallContext->m_stage1MemArenaState);
+    stage3CallContext = NULL; // WARNING! stage3CallContext is not valid after this point!
+    dIVERIFY(stage3CallContext == NULL); // To suppress unused variable assignment warnings
+
+    unsigned int m = localContext->m_m;
+    unsigned int nub = localContext->m_nub;
+    //const unsigned int *mindex = localContext->m_mindex;
+    int *findex = localContext->m_findex;
+    dReal *A = localContext->m_A;
+    dReal *pairsRhsLambda = localContext->m_pairsRhsCfm; // Reuse cfm buffer for lambdas as the former values are not needed any more
+    dReal *pairsLoHi = localContext->m_pairsLoHi;
+
+    if (m > 0) {
+        BEGIN_STATE_SAVE(memarena, lcpstate) {
+            IFTIMING(dTimerNow ("solve LCP problem"));
+
+            // solve the LCP problem and get lambda.
+            // this will destroy A but that's OK
+            dxSolveLCP (memarena, m, A, pairsRhsLambda, NULL, nub, pairsLoHi, findex);
+            dSASSERT((int)RLE__RHS_LAMBDA_MAX == PBX__MAX && (int)RLE_RHS == PBX_B && (int)RLE_LAMBDA == PBX_X);
+            dSASSERT((int)LHE__LO_HI_MAX == PLH__MAX && (int)LHE_LO == PLH_LO && (int)LHE_HI == PLH_HI);
+
+        } END_STATE_SAVE(memarena, lcpstate);
+    }
+
+    // void *stage3MemarenaState = memarena->SaveState();
+
+    dxStepperStage4CallContext *stage4CallContext = (dxStepperStage4CallContext *)memarena->AllocateBlock(sizeof(dxStepperStage4CallContext));
+    stage4CallContext->Initialize(callContext, localContext/*, stage3MemarenaState*/);
+
+    const unsigned allowedThreads = callContext->m_stepperAllowedThreads;
+    dIASSERT(allowedThreads != 0);
+
+    if (allowedThreads == 1) {
+        IFTIMING(dTimerNow ("compute and apply constraint force"));
+        dxStepIsland_Stage4(stage4CallContext);
+        IFTIMING(dTimerEnd());
+
+        if (m > 0) {
+            IFTIMING(dTimerReport(stdout,1));
+        }
+    }
+    else {
+        dCallReleaseeID finalReleasee = callContext->m_finalReleasee;
+        dxWorld *world = callContext->m_world;
+        world->AlterThreadedCallDependenciesCount(finalReleasee, allowedThreads - 1);
+        world->PostThreadedCallsGroup(NULL, allowedThreads - 1, finalReleasee, &dxStepIsland_Stage4_Callback, stage4CallContext, "StepIsland Stage4");
+        // Note: Adding another dependency for the finalReleasee is not necessary as it already depends on the current call
+        dxStepIsland_Stage4(stage4CallContext);
+    }
+}
+
+static 
+int dxStepIsland_Stage4_Callback(void *_stage4CallContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxStepperStage4CallContext *stage4CallContext = (dxStepperStage4CallContext *)_stage4CallContext;
+    dxStepIsland_Stage4(stage4CallContext);
+    return 1;
+}
+
+static 
+void dxStepIsland_Stage4(dxStepperStage4CallContext *stage4CallContext)
+{
+    const dxStepperProcessingCallContext *callContext = stage4CallContext->m_stepperCallContext;
+    const dxStepperLocalContext *localContext = stage4CallContext->m_localContext;
+
+    const dReal stepSize = callContext->m_stepSize;
+    dxBody *const *bodies = callContext->m_islandBodiesStart;
+    dReal *invI = localContext->m_invI;
+    dJointWithInfo1 *jointInfos = localContext->m_jointinfos;
+    dReal *J = localContext->m_J;
+    dReal *pairsRhsLambda = localContext->m_pairsRhsCfm;
+    const unsigned int *mIndex = localContext->m_mindex;
+    atomicord32 *bodyStartJoints = localContext->m_bodyStartJoints;
+    atomicord32 *bodyJointLinks = localContext->m_bodyJointLinks;
+    const unsigned int nb = callContext->m_islandBodiesCount;
+
+    unsigned bi;
+    while ((bi = ThrsafeIncrementIntUpToLimit(&stage4CallContext->m_bi_constrForce, nb)) != nb) {
+        dVector3 angularForceAccumulator;
+        dxBody *b = bodies[bi];
+        const dReal *invIrow = invI + (sizeint)bi * dM3E__MAX;
+        dReal body_invMass_mul_stepSize = stepSize * b->invMass;
+
+        dReal bodyConstrForce[CFE__MAX];
+        bool constrForceAvailable = false;
+        
+        unsigned linkIndex = bodyStartJoints != NULL ? bodyStartJoints[bi] : 0;
+        if (linkIndex != 0) {
+            dSetZero(bodyConstrForce, dARRAY_SIZE(bodyConstrForce));
+        }
+
+        // compute the constraint force as constrForce = J'*lambda
+        for (; linkIndex != 0; constrForceAvailable = true, linkIndex = bodyJointLinks[linkIndex - 1]) {
+            unsigned jointIndex = (linkIndex - 1) / dJCB__MAX;
+            unsigned jointBodyIndex = (linkIndex - 1) % dJCB__MAX;
+
+            const dJointWithInfo1 *currJointInfo = jointInfos + jointIndex;
+            unsigned ofsi = mIndex[jointIndex];
+            dIASSERT(dIN_RANGE(jointIndex, 0, localContext->m_nj));
+
+            const dReal *JRow = J + (sizeint)ofsi * (2 * JME__MAX);
+            const dReal *rowRhsLambda = pairsRhsLambda + (sizeint)ofsi * RLE__RHS_LAMBDA_MAX;
+
+            dxJoint *joint = currJointInfo->joint;
+            const unsigned int infom = currJointInfo->info.m;
+
+            // unsigned jRowExtraOffset = jointBodyIndex * infom * JME__MAX;
+            unsigned jRowExtraOffset = jointBodyIndex != dJCB__MIN ? infom * JME__MAX : 0;
+            dSASSERT(dJCB__MAX == 2);
+
+            dJointFeedback *fb = joint->feedback;
+            MultiplyAddJxLambdaToCForce(bodyConstrForce, JRow + jRowExtraOffset, rowRhsLambda, infom, fb, jointBodyIndex);
+        }
+
+        // compute the velocity update
+        if (constrForceAvailable) {
+            // add fe to cforce and multiply cforce by stepSize
+            for (unsigned int j = dSA__MIN; j != dSA__MAX; ++j) {
+                b->lvel[dV3E__AXES_MIN + j] += (bodyConstrForce[CFE__L_MIN + j] + b->facc[dV3E__AXES_MIN + j]) * body_invMass_mul_stepSize;
+            }
+            for (unsigned int k = dSA__MIN; k != dSA__MAX; ++k) {
+                angularForceAccumulator[dV3E__AXES_MIN + k] = (bodyConstrForce[CFE__A_MIN + k] + b->tacc[dV3E__AXES_MIN + k]) * stepSize;
+            }
+        }
+        else {
+            // add fe to cforce and multiply cforce by stepSize
+            dAddVectorScaledVector3(b->lvel, b->lvel, b->facc, body_invMass_mul_stepSize);
+            dCopyScaledVector3(angularForceAccumulator, b->tacc, stepSize);
+        }
+
+        dMultiplyAdd0_331 (b->avel, invIrow, angularForceAccumulator + dV3E__AXES_MIN);
+
+        // update the position and orientation from the new linear/angular velocity
+        // (over the given time step)
+        dxStepBody (b, stepSize);
+
+        // zero all force accumulators
+        dZeroVector3(b->facc);
+        dZeroVector3(b->tacc);
+    }
+}
+
+
+//****************************************************************************
+
+/*extern */
+sizeint dxEstimateStepMemoryRequirements (dxBody * const *body, unsigned int nb, dxJoint * const *_joint, unsigned int _nj)
+{
+    (void)body; // unused
+    unsigned int nj, m;
+
+    {
+        unsigned int njcurr = 0, mcurr = 0;
+        dxJoint::SureMaxInfo info;
+        dxJoint *const *const _jend = _joint + _nj;
+        for (dxJoint *const *_jcurr = _joint; _jcurr != _jend; ++_jcurr) {	
+            dxJoint *j = *_jcurr;
+            j->getSureMaxInfo (&info);
+
+            unsigned int jm = info.max_m;
+            if (jm > 0) {
+                njcurr++;
+
+                mcurr += jm;
+            }
+        }
+        nj = njcurr; m = mcurr;
+    }
+
+    sizeint res = 0;
+
+    res += dOVERALIGNED_SIZE(sizeof(dReal) * dM3E__MAX * nb, INVI_ALIGNMENT); // for invI
+
+    {
+        sizeint sub1_res1 = dEFFICIENT_SIZE(sizeof(dJointWithInfo1) * 2 * _nj); // for initial jointinfos
+
+        // The array can't grow right more than by nj
+        sizeint sub1_res2 = dEFFICIENT_SIZE(sizeof(dJointWithInfo1) * ((sizeint)_nj + (sizeint)nj)); // for shrunk jointinfos
+        sub1_res2 += dEFFICIENT_SIZE(sizeof(dxStepperLocalContext)); //for dxStepperLocalContext
+        if (m > 0) {
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(unsigned int) * (nj + 1)); // for mindex
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(int) * m); // for findex
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(dReal) * 2 * JME__MAX * m); // for J
+            unsigned int mskip = dPAD(m);
+            sub1_res2 += dOVERALIGNED_SIZE(sizeof(dReal) * mskip * m, AMATRIX_ALIGNMENT); // for A
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(dReal) * RCE__RHS_CFM_MAX * m); // for pairsRhsCfm
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(dReal) * LHE__LO_HI_MAX * m); // for pairsLoHi
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(atomicord32) * nb); // for bodyStartJoints
+            sub1_res2 += dEFFICIENT_SIZE(sizeof(atomicord32)* dJCB__MAX * nj); // for bodyJointLinks
+        }
+
+        {
+            sizeint sub2_res1 = dEFFICIENT_SIZE(sizeof(dxStepperStage3CallContext)); // for dxStepperStage3CallContext
+
+            sizeint sub2_res2 = 0;
+
+            sizeint sub2_res3 = dEFFICIENT_SIZE(sizeof(dxStepperStage4CallContext)); // for dxStepperStage4CallContext
+
+            if (m > 0) {
+                sub2_res1 += dOVERALIGNED_SIZE(sizeof(dReal) * 2 * JIM__MAX * m, JINVM_ALIGNMENT); // for JinvM
+                sub2_res1 += dEFFICIENT_SIZE(sizeof(dReal) * dDA__MAX * nb); // for rhs_tmp
+                sub2_res1 += dEFFICIENT_SIZE(sizeof(dxStepperStage2CallContext)); // for dxStepperStage2CallContext
+
+                sub2_res2 += dxEstimateSolveLCPMemoryReq(m, false);
+            }
+
+            sub1_res2 += dMAX(sub2_res1, dMAX(sub2_res2, sub2_res3));
+        }
+
+        sizeint sub1_res12_max = dMAX(sub1_res1, sub1_res2);
+        sizeint stage01_contexts = dEFFICIENT_SIZE(sizeof(dxStepperStage0BodiesCallContext))
+            + dEFFICIENT_SIZE(sizeof(dxStepperStage0JointsCallContext))
+            + dEFFICIENT_SIZE(sizeof(dxStepperStage1CallContext));
+        res += dMAX(sub1_res12_max, stage01_contexts);
+    }
+
+    return res;
+}
+
+
+/*extern */
+unsigned dxEstimateStepMaxCallCount(
+    unsigned /*activeThreadCount*/, unsigned allowedThreadCount)
+{
+    unsigned result = 1 // dxStepIsland itself
+        + (2 * allowedThreadCount + 2) // (dxStepIsland_Stage2a + dxStepIsland_Stage2b) * allowedThreadCount + 2 * dxStepIsland_Stage2?_Sync
+        + 1; // dxStepIsland_Stage3
+    return result;
+}
diff --git a/libs/ode-0.16.1/ode/src/step.h b/libs/ode-0.16.1/ode/src/step.h
new file mode 100644
index 0000000..dc8331a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/step.h
@@ -0,0 +1,40 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_STEP_H_
+#define _ODE_STEP_H_
+
+#include <ode/common.h>
+
+struct dxStepperProcessingCallContext;
+
+
+sizeint dxEstimateStepMemoryRequirements(
+    dxBody * const *body, unsigned int nb, dxJoint * const *_joint, unsigned int _nj);
+unsigned dxEstimateStepMaxCallCount(
+    unsigned activeThreadCount, unsigned allowedThreadCount);
+
+void dxStepIsland(const dxStepperProcessingCallContext *callContext);
+
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/threaded_solver_ldlt.h b/libs/ode-0.16.1/ode/src/threaded_solver_ldlt.h
new file mode 100644
index 0000000..c791508
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threaded_solver_ldlt.h
@@ -0,0 +1,809 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * Equation System Threaded Solver
+ * Copyright (c) 2017-2019 Oleh Derevenko, odar@eleks.com (change all "a" to "e")
+ */
+
+
+
+#ifndef _ODE_THREADED_SOLVER_LDLT_H_
+#define _ODE_THREADED_SOLVER_LDLT_H_
+
+
+#include "coop_matrix_types.h"
+#include <ode/threading.h>
+
+
+class dxThreadingBase;
+class dxResourceRequirementDescriptor;
+class dxRequiredResourceContainer;
+
+
+class ThreadedEquationSolverLDLT
+{
+public:
+    static void estimateCooperativeFactoringLDLTResourceRequirements(dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+        unsigned allowedThreadCount, unsigned rowCount);
+    static void cooperativelyFactorLDLT(dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+        dReal *A, dReal *d, unsigned rowCount, unsigned rowSkip);
+    
+    static void estimateCooperativeSolvingL1StraightResourceRequirements(dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+        unsigned allowedThreadCount, unsigned rowCount);
+    static void cooperativelySolveL1Straight(dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+        const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip);
+    
+    static void estimateCooperativeSolvingL1TransposedResourceRequirements(dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+        unsigned allowedThreadCount, unsigned rowCount);
+    static void cooperativelySolveL1Transposed(dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+        const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip);
+
+    static void estimateCooperativeScalingVectorResourceRequirements(dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+        unsigned allowedThreadCount, unsigned elementCount);
+    static void cooperativelyScaleVector(dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+        dReal *vectorData, const dReal *scaleData, unsigned elementCount);
+
+    static void estimateCooperativeSolvingLDLTResourceRequirements(dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+        unsigned allowedThreadCount, unsigned rowCount);
+    static void cooperativelySolveLDLT(dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+        const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip);
+
+public:
+    enum
+    {
+        ALLOCATION_DEFAULT_ALIGNMENT = COOP_THREAD_DATA_ALIGNMENT_SIZE,
+    };
+
+private:
+    struct FactorizationSolveL1StripeCellContext;
+    struct FactorizationFactorizeL1StripeThreadContext;
+
+    enum
+    {
+        FLDLT_D_STRIDE          = 1,
+        FLDLT_COOPERATIVE_BLOCK_COUNT_MINIMUM = 5,
+
+        FSL1S_BLOCK_SIZE        = 2,
+
+        FSL1S_REGULAR_B_ROWS    = FSL1S_BLOCK_SIZE,
+        FSL1S_FINAL_B_ROWS      = 1,
+
+        FFL1S_REGULAR_A_ROWS    = FSL1S_BLOCK_SIZE,
+        FFL1S_FINAL_A_ROWS      = 1,
+        FFL1S_REGULAR_BLOCK_SIZE = 16,  // A suitable by magnitude number being a power of 2 and (naturally) not being divisible by 6
+        FFL1S_FINAL_BLOCK_SIZE  = 32, // A suitable by magnitude number being a power of 2 and (naturally) not being divisible by 6
+    };
+
+    static unsigned restrictFactoringLDLTAllowedThreadCount(
+        dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount);
+    static void doEstimateCooperativeFactoringLDLTResourceRequirementsValidated(
+        dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+        unsigned allowedThreadCount, unsigned rowCount);
+    static void doCooperativelyFactorLDLTValidated(
+        dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+        dReal *A, dReal *d, unsigned rowCount, unsigned rowSkip);
+
+
+    static unsigned deriveSolvingL1StripeBlockCount(unsigned rowCount, unsigned blockStep)
+    {
+        return (rowCount + (blockStep - 1)) / blockStep;
+    }
+
+    struct FactorizationSolvingL1StripeMemoryEstimates
+    {
+        void assignData(sizeint descriptorSizeRequired, sizeint contextSizeRequired)
+        {
+            m_descriptorSizeRequired = descriptorSizeRequired;
+            m_contextSizeRequired = contextSizeRequired;
+        }
+
+        sizeint  m_descriptorSizeRequired;
+        sizeint  m_contextSizeRequired;
+    };
+
+    static unsigned deriveSolvingL1StripeThreadCount(unsigned blockCount, unsigned allowedThreadCount)
+    {
+        dIASSERT(allowedThreadCount >= 1);
+
+        unsigned maximumCount = blockCount / 2;
+        return maximumCount >= allowedThreadCount ? allowedThreadCount : dMACRO_MAX(maximumCount, 1U);
+    }
+
+    static sizeint estimateCooperativelySolvingL1Stripe_XMemoryRequirement(unsigned blockCount, 
+        FactorizationSolvingL1StripeMemoryEstimates &ref_memoryEstimates)
+    {
+        sizeint descriptorSizeRequired = dOVERALIGNED_SIZE(sizeof(cellindexint) * blockCount, COOP_THREAD_DATA_ALIGNMENT_SIZE);
+        sizeint contextSizeRequired = dOVERALIGNED_SIZE(sizeof(FactorizationSolveL1StripeCellContext) * (CCI__MAX + 1) * blockCount, COOP_THREAD_DATA_ALIGNMENT_SIZE);
+        ref_memoryEstimates.assignData(descriptorSizeRequired, contextSizeRequired);
+
+        sizeint totalSizeRequired = descriptorSizeRequired + contextSizeRequired;
+        return totalSizeRequired;
+    }
+
+    static void *markCooperativelySolvingL1Stripe_XMemoryStructuresOut(void *buffer, 
+        const FactorizationSolvingL1StripeMemoryEstimates &memoryEstimates, 
+        cellindexint *&out_blockProgressDescriptors, FactorizationSolveL1StripeCellContext *&out_cellContexts)
+    {
+        void *currentLocation = buffer;
+
+        out_blockProgressDescriptors = (cellindexint *)currentLocation; currentLocation = (uint8 *)currentLocation + memoryEstimates.m_descriptorSizeRequired;
+        out_cellContexts = (FactorizationSolveL1StripeCellContext *)currentLocation; currentLocation = (uint8 *)currentLocation + memoryEstimates.m_contextSizeRequired;
+
+        return currentLocation;
+    }
+
+    static void initializeCooperativelySolvingL1Stripe_XMemoryStructures(unsigned blockCount, 
+        atomicord32 &out_blockCompletionProgress, cellindexint *blockProgressDescriptors, FactorizationSolveL1StripeCellContext *dUNUSED(cellContexts))
+    {
+        out_blockCompletionProgress = 0;
+        memset(blockProgressDescriptors, 0, blockCount * sizeof(*blockProgressDescriptors));
+    }
+
+    template<unsigned int block_step, unsigned int b_rows>
+    static void participateSolvingL1Stripe_X(const dReal *L, dReal *B, unsigned blockCount, unsigned rowSkip, 
+        volatile atomicord32 &refBlockCompletionProgress/*=0*/, volatile cellindexint *blockProgressDescriptors/*=[blockCount]*/, 
+        FactorizationSolveL1StripeCellContext *cellContexts/*=[CCI__MAX x blockCount] + [blockCount]*/, unsigned ownThreadIndex);
+
+    static unsigned deriveScalingAndFactorizingL1StripeBlockCountFromSolvingBlockIndex(unsigned solvingBlockIndex, unsigned solvingBlockStep, unsigned blockARows)
+    {
+        unsigned factorizingBlockSize = deriveScalingAndFactorizingL1StripeBlockSize(blockARows);
+        return deriveScalingAndFactorizingL1StripeBlockCountFromFactorizationRow(solvingBlockIndex * solvingBlockStep, factorizingBlockSize);
+    }
+
+    static unsigned deriveScalingAndFactorizingL1StripeBlockCountFromFactorizationRow(unsigned factorizationRowIndex, unsigned factorizationBlockSize)
+    {
+        return (factorizationRowIndex + (factorizationBlockSize - 1)) / factorizationBlockSize;
+    }
+
+    static unsigned deriveScalingAndFactorizingL1StripeBlockSize(unsigned blockARows)
+    {
+        unsigned result = blockARows != 1 ? FFL1S_REGULAR_BLOCK_SIZE : FFL1S_FINAL_BLOCK_SIZE;
+        dIASSERT(blockARows >= 1 && blockARows <= 2);
+
+        return result;
+    }
+
+
+    static unsigned deriveScalingAndFactorizingL1StripeThreadCount(unsigned blockCount, unsigned allowedThreadCount)
+    {
+        dIASSERT(blockCount != 0);
+        dIASSERT(allowedThreadCount >= 1);
+
+        return dMACRO_MIN(blockCount, allowedThreadCount);
+    }
+
+    struct FactorizationFactorizeL1StripeContext;
+
+    struct FactorizationScalingAndFactorizingL1StripeMemoryEstimates
+    {
+        void assignData(sizeint contextSizeRequired)
+        {
+            m_contextSizeRequired = contextSizeRequired;
+        }
+
+        sizeint  m_contextSizeRequired;
+    };
+
+    static sizeint estimateCooperativelyScalingAndFactorizingL1Stripe_XMemoryRequirement(unsigned factorizingMaximumThreads, 
+        FactorizationScalingAndFactorizingL1StripeMemoryEstimates &ref_memoryEstimates)
+    {
+        dIASSERT(factorizingMaximumThreads != 0);
+
+        sizeint contextSizeRequired = dOVERALIGNED_SIZE(sizeof(FactorizationFactorizeL1StripeContext) + sizeof(FactorizationFactorizeL1StripeThreadContext) * (factorizingMaximumThreads - 1), COOP_THREAD_DATA_ALIGNMENT_SIZE);
+        ref_memoryEstimates.assignData(contextSizeRequired);
+
+        sizeint totalSizeRequired = contextSizeRequired;
+        return totalSizeRequired;
+    }
+
+    static void *markCooperativelyScalingAndFactorizingL1Stripe_XMemoryStructuresOut(void *buffer, 
+        const FactorizationScalingAndFactorizingL1StripeMemoryEstimates &memoryEstimates, FactorizationFactorizeL1StripeContext *&out_factorizationContext)
+    {
+        void *currentLocation = buffer;
+
+        out_factorizationContext = (FactorizationFactorizeL1StripeContext *)currentLocation; currentLocation = (uint8 *)currentLocation + memoryEstimates.m_contextSizeRequired;
+
+        return currentLocation;
+    }
+
+    static void initializeCooperativelyScalingAndFactorizingL1Stripe_XMemoryStructures( 
+        FactorizationFactorizeL1StripeContext *factorizationContext, unsigned threadCount)
+    {
+        factorizationContext->initialize(threadCount);
+    }
+
+
+    template<unsigned int a_rows, unsigned int d_stride>
+    static void participateScalingAndFactorizingL1Stripe_X(dReal *ARow, dReal *d, unsigned factorizationRow, unsigned rowSkip,
+        FactorizationFactorizeL1StripeContext *factorizationContext, unsigned ownThreadIndex);
+
+private:
+    struct FactorLDLTWorkerContext
+    {
+        FactorLDLTWorkerContext(dxThreadingBase *threading, unsigned allowedThreadCount, 
+            dReal *A, dReal *d, unsigned totalBlockCount, unsigned rowCount, unsigned rowSkip, 
+            atomicord32 &ref_solvingBlockCompletionProgress, cellindexint *solvingBlockProgressDescriptors, 
+            FactorizationSolveL1StripeCellContext *solvingCellContexts, 
+            FactorizationFactorizeL1StripeContext *factorizingFactorizationContext,
+            dCallReleaseeID calculationFinishReleasee):
+            m_threading(threading),
+            m_allowedThreadCount(allowedThreadCount),
+            m_A(A),
+            m_ARow(A),
+            m_d(d),
+            m_solvingBlockIndex(0),
+            m_totalBlockCount(totalBlockCount),
+            m_rowCount(rowCount),
+            m_rowSkip(rowSkip),
+            m_refSolvingBlockCompletionProgress(ref_solvingBlockCompletionProgress),
+            m_solvingBlockProgressDescriptors(solvingBlockProgressDescriptors),
+            m_solvingCellContexts(solvingCellContexts),
+            m_factorizingFactorizationContext(factorizingFactorizationContext),
+            m_calculationFinishReleasee(calculationFinishReleasee)
+        {
+        }
+
+        void incrementForNextBlock()
+        {
+            const unsigned blockStep = FSL1S_BLOCK_SIZE;
+
+            m_ARow += blockStep * m_rowSkip;
+            m_solvingBlockIndex += 1;
+        }
+
+        dxThreadingBase             *m_threading;
+        unsigned                    m_allowedThreadCount;
+        dReal                       *m_A;
+        dReal                       *m_ARow;
+        dReal                       *m_d;
+        unsigned                    m_solvingBlockIndex;
+        unsigned                    m_totalBlockCount;
+        unsigned                    m_rowCount;
+        unsigned                    m_rowSkip;
+        atomicord32                 &m_refSolvingBlockCompletionProgress;
+        cellindexint                *m_solvingBlockProgressDescriptors;
+        FactorizationSolveL1StripeCellContext *m_solvingCellContexts; 
+        FactorizationFactorizeL1StripeContext *m_factorizingFactorizationContext;
+        dCallReleaseeID             m_calculationFinishReleasee;
+    };
+
+    static int factotLDLT_solvingComplete_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    static void factotLDLT_solvingComplete(FactorLDLTWorkerContext &ref_context, unsigned ownThreadIndex);
+
+    static int factotLDLT_solvingCompleteSync_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    static void factotLDLT_solvingCompleteSync(FactorLDLTWorkerContext &ref_workerContext);
+
+    static int factotLDLT_scalingAndFactorizingComplete_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    static void factotLDLT_scalingAndFactorizingComplete(FactorLDLTWorkerContext &ref_workerContext, unsigned ownThreadIndex);
+
+    static int factotLDLT_scalingAndFactorizingCompleteSync_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    static void factotLDLT_scalingAndFactorizingCompleteSync(FactorLDLTWorkerContext &ref_workerContext);
+
+    static int factotLDLT_solvingFinal_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    static void factotLDLT_solvingFinal(FactorLDLTWorkerContext &ref_context, unsigned ownThreadIndex);
+
+    static int factotLDLT_solvingFinalSync_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    static void factotLDLT_solvingFinalSync(FactorLDLTWorkerContext &ref_workerContext);
+
+    static int factotLDLT_scalingAndFactorizingFinal_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    static void factotLDLT_scalingAndFactorizingFinal(FactorLDLTWorkerContext &ref_workerContext, unsigned ownThreadIndex);
+
+    static int factotLDLT_completion_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+
+private:
+    struct FactorizationSolveL1StripeCellContext
+    {
+        template<unsigned int block_step, unsigned int b_rows>
+        static void initializePrecalculatedZs(dReal (&Z)[block_step][b_rows])
+        {
+            Z[0][0] = 0;
+            if (b_rows >= 2)
+            {
+                Z[0][1] = 0;
+            }
+            Z[1][0] = 0;
+            if (b_rows >= 2)
+            {
+                Z[1][1] = 0;
+            }
+            dSASSERT(block_step == 2);
+            dSASSERT(b_rows >= 1 && b_rows <= 2);
+        }
+
+        template<unsigned int block_step, unsigned int b_rows>
+        void loadPrecalculatedZs(dReal (&Z)[block_step][b_rows]) const
+        {
+            dSASSERT(block_step <= dARRAY_SIZE(m_c));
+            dSASSERT(b_rows <= dARRAY_SIZE(m_c[0]));
+
+            Z[0][0] = m_c[0][0];
+            if (b_rows >= 2)
+            {
+                Z[0][1] = m_c[0][1];
+            }
+            Z[1][0] = m_c[1][0];
+            if (b_rows >= 2)
+            {
+                Z[1][1] = m_c[1][1];
+            }
+            dSASSERT(block_step == 2);
+            dSASSERT(b_rows >= 1 && b_rows <= 2);
+        }
+
+        template<unsigned int block_step, unsigned int b_rows>
+        void storePrecalculatedZs(const dReal (&Z)[block_step][b_rows])
+        {
+            dSASSERT(block_step <= dARRAY_SIZE(m_c));
+            dSASSERT(b_rows <= dARRAY_SIZE(m_c[0]));
+
+            m_c[0][0] = Z[0][0];
+            if (b_rows >= 2)
+            {
+                m_c[0][1] = Z[0][1];
+            }
+            m_c[1][0] = Z[1][0];
+            if (b_rows >= 2)
+            {
+                m_c[1][1] = Z[1][1];
+            }
+            dSASSERT(block_step == 2);
+            dSASSERT(b_rows >= 1 && b_rows <= 2);
+        }
+
+        dReal m_c[FSL1S_BLOCK_SIZE][FSL1S_REGULAR_B_ROWS];
+        // dReal m_reserved[4];
+    };
+
+    static FactorizationSolveL1StripeCellContext &buildBlockContextRef(FactorizationSolveL1StripeCellContext *cellContexts, unsigned blockIndex, CellContextInstance contextInstance)
+    {
+        return cellContexts[blockIndex * CCI__MAX + contextInstance];
+    }
+
+    static FactorizationSolveL1StripeCellContext &buildResultContextRef(FactorizationSolveL1StripeCellContext *cellContexts, unsigned blockIndex, unsigned blockCount)
+    {
+        return cellContexts[blockCount * CCI__MAX + blockIndex];
+    }
+
+private:
+    struct FactorizationFactorizeL1StripeThreadContext
+    {
+        template<unsigned int a_rows>
+        void assignDataSum(const dReal (&sameZ)[a_rows], const dReal (&mixedZ)[dMACRO_MAX(a_rows - 1, 1)], 
+            const FactorizationFactorizeL1StripeThreadContext &partialSumContext)
+        {
+            m_sameZ[0] = sameZ[0] + partialSumContext.m_sameZ[0];
+            if (a_rows >= 2)
+            {
+                m_sameZ[1] = sameZ[1] + partialSumContext.m_sameZ[1];
+                m_mixedZ[0] = mixedZ[0] + partialSumContext.m_mixedZ[0];
+            }
+        }
+
+        template<unsigned int a_rows>
+        void assignDataAlone(const dReal (&sameZ)[a_rows], const dReal (&mixedZ)[dMACRO_MAX(a_rows - 1, 1)])
+        {
+            m_sameZ[0] = sameZ[0];
+            if (a_rows >= 2)
+            {
+                m_sameZ[1] = sameZ[1];
+                m_mixedZ[0] = mixedZ[0];
+            }
+        }
+
+        template<unsigned int a_rows>
+        void retrieveData(dReal (&out_sameZ)[a_rows], dReal (&out_mixedZ)[dMACRO_MAX(a_rows - 1, 1)]) const
+        {
+            out_sameZ[0] = m_sameZ[0];
+            if (a_rows >= 2)
+            {
+                out_sameZ[1] = m_sameZ[1];
+                out_mixedZ[0] = m_mixedZ[0];
+            }
+            dAASSERT(a_rows >= 1 && a_rows <= 2);
+        }
+
+        dReal m_sameZ[FFL1S_REGULAR_A_ROWS];
+        dReal m_mixedZ[dMACRO_MAX(FFL1S_REGULAR_A_ROWS - 1, 1)];
+        dReal m_reserved[1]; // [5]; // for alignment
+    };
+
+    struct FactorizationFactorizeL1StripeContext
+    {
+        void initialize(unsigned threadCount)
+        {
+            m_threadsRunning = threadCount;
+            m_nextColumnIndex = 0;
+            m_sumThreadIndex = 0;
+        }
+
+        atomicord32 m_threadsRunning;
+        atomicord32 m_nextColumnIndex;
+        volatile atomicord32 m_sumThreadIndex;
+        atomicord32 m_reserved[1]; // [13]; // for alignment
+        FactorizationFactorizeL1StripeThreadContext m_threadContexts[1]; // =[threadCount]
+    };
+
+private:
+    struct SolveL1StraightCellContext;
+
+    enum
+    {
+        SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM = 8,
+
+        SL1S_B_STRIDE   = 1,
+        SL1S_BLOCK_SIZE = 4,
+    };
+
+    static unsigned restrictSolvingL1StraightAllowedThreadCount(
+        dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount);
+    static void doEstimateCooperativeSolvingL1StraightResourceRequirementsValidated(
+        dxResourceRequirementDescriptor *summaryRequirementsDescriptor, 
+        unsigned allowedThreadCount, unsigned rowCount);
+    static void doCooperativelySolveL1StraightValidated(
+        dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+        const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip);
+
+    static unsigned deriveSolvingL1StraightBlockCount(unsigned rowCount, unsigned blockStep)
+    {
+        return (rowCount + (blockStep - 1)) / blockStep;
+    }
+
+    struct SolvingL1StraightMemoryEstimates
+    {
+        void assignData(sizeint descriptorSizeRequired, sizeint contextSizeRequired)
+        {
+            m_descriptorSizeRequired = descriptorSizeRequired;
+            m_contextSizeRequired = contextSizeRequired;
+        }
+
+        sizeint  m_descriptorSizeRequired;
+        sizeint  m_contextSizeRequired;
+    };
+
+    static unsigned deriveSolvingL1StraightThreadCount(unsigned blockCount, unsigned allowedThreadCount)
+    {
+        dIASSERT(allowedThreadCount >= 1);
+
+        unsigned maximumCount = 1 + blockCount / SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM;
+        return maximumCount >= allowedThreadCount ? allowedThreadCount : dMACRO_MAX(maximumCount, 1U);
+    }
+
+    template<unsigned int block_step>
+    static sizeint estimateCooperativelySolvingL1StraightMemoryRequirement(unsigned rowCount, SolvingL1StraightMemoryEstimates &ref_solvingMemoryEstimates);
+
+    static void *markCooperativelySolvingL1StraightMemoryStructuresOut(void *buffer, 
+        const SolvingL1StraightMemoryEstimates &solvingMemoryEstimates, 
+        cellindexint *&out_blockProgressDescriptors, SolveL1StraightCellContext *&out_cellContexts)
+    {
+        void *currentLocation = buffer;
+
+        out_blockProgressDescriptors = (cellindexint *)currentLocation; currentLocation = (uint8 *)currentLocation + solvingMemoryEstimates.m_descriptorSizeRequired;
+        out_cellContexts = (SolveL1StraightCellContext *)currentLocation; currentLocation = (uint8 *)currentLocation + solvingMemoryEstimates.m_contextSizeRequired;
+        return currentLocation;
+    }
+
+    template<unsigned int block_step>
+    static void initializeCooperativelySolveL1StraightMemoryStructures(unsigned rowCount, 
+        atomicord32 &out_blockCompletionProgress, cellindexint *blockProgressDescriptors, SolveL1StraightCellContext *cellContexts);
+    template<unsigned int block_step, unsigned int b_stride>
+    static void participateSolvingL1Straight(const dReal *L, dReal *B, unsigned rowCount, unsigned rowSkip, 
+        volatile atomicord32 &refBlockCompletionProgress/*=0*/, volatile cellindexint *blockProgressDescriptors/*=[blockCount]*/, 
+        SolveL1StraightCellContext *cellContexts/*=[CCI__MAX x blockCount] + [blockCount]*/, unsigned ownThreadIndex);
+
+private:
+    struct SolveL1StraightWorkerContext
+    {
+        void init(const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip, 
+            atomicord32 &ref_blockCompletionProgress, cellindexint *blockProgressDescriptors, SolveL1StraightCellContext *cellContexts)
+        {
+            m_L = L;
+            m_b = b;
+            m_rowCount = rowCount;
+            m_rowSkip = rowSkip; 
+            m_ptrBlockCompletionProgress = &ref_blockCompletionProgress;
+            m_blockProgressDescriptors = blockProgressDescriptors;
+            m_cellContexts = cellContexts;
+        }
+
+        const dReal     *m_L;
+        dReal           *m_b;
+        unsigned        m_rowCount;
+        unsigned        m_rowSkip; 
+        atomicord32     *m_ptrBlockCompletionProgress;
+        cellindexint    *m_blockProgressDescriptors;
+        SolveL1StraightCellContext *m_cellContexts;
+    };
+
+    static int solveL1Straight_worker_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    static void solveL1Straight_worker(SolveL1StraightWorkerContext &ref_context, unsigned ownThreadIndex);
+
+    static int solveL1Straight_completion_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+
+private:
+    struct SolveL1StraightCellContext
+    {
+        template<unsigned int block_step>
+        static void initializePrecalculatedZs(dReal (&Z)[block_step])
+        {
+            std::fill(Z, Z + block_step, REAL(0.0));
+        }
+
+        template<unsigned int block_step>
+        void loadPrecalculatedZs(dReal (&Z)[block_step]) const
+        {
+            dSASSERT(block_step <= dARRAY_SIZE(m_c));
+
+            std::copy(m_c, m_c + block_step, Z);
+        }
+
+        template<unsigned int block_step>
+        void storePrecalculatedZs(const dReal (&Z)[block_step])
+        {
+            dSASSERT(block_step <= dARRAY_SIZE(m_c));
+
+            std::copy(Z, Z + block_step, m_c);
+        }
+
+        dReal m_c[SL1S_BLOCK_SIZE];
+    };
+
+
+    static SolveL1StraightCellContext &buildBlockContextRef(SolveL1StraightCellContext *cellContexts, unsigned blockIndex, CellContextInstance contextInstance)
+    {
+        return cellContexts[blockIndex * CCI__MAX + contextInstance];
+    }
+
+    static SolveL1StraightCellContext &buildResultContextRef(SolveL1StraightCellContext *cellContexts, unsigned blockIndex, unsigned blockCount)
+    {
+        return cellContexts[blockCount * CCI__MAX + blockIndex];
+    }
+
+
+private:
+    struct SolveL1TransposedCellContext;
+
+    enum
+    {
+        SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM = SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM,
+
+        SL1T_B_STRIDE   = SL1S_B_STRIDE,
+        SL1T_BLOCK_SIZE = 4,
+    };
+
+    static unsigned restrictSolvingL1TransposedAllowedThreadCount(
+        dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount);
+    static void doEstimateCooperativeSolvingL1TransposedResourceRequirementsValidated(
+        dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+        unsigned allowedThreadCount, unsigned rowCount);
+    static void doCooperativelySolveL1TransposedValidated(
+        dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+        const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip);
+
+    static unsigned deriveSolvingL1TransposedBlockCount(unsigned rowCount, unsigned blockStep)
+    {
+        return (rowCount + (blockStep - 1)) / blockStep;
+    }
+
+    struct SolvingL1TransposedMemoryEstimates
+    {
+        void assignData(sizeint descriptorSizeRequired, sizeint contextSizeRequired)
+        {
+            m_descriptorSizeRequired = descriptorSizeRequired;
+            m_contextSizeRequired = contextSizeRequired;
+        }
+
+        sizeint  m_descriptorSizeRequired;
+        sizeint  m_contextSizeRequired;
+    };
+
+    static unsigned deriveSolvingL1TransposedThreadCount(unsigned blockCount, unsigned allowedThreadCount)
+    {
+        dSASSERT(SL1T_COOPERATIVE_BLOCK_COUNT_MINIMUM + 0 == SL1S_COOPERATIVE_BLOCK_COUNT_MINIMUM);
+        
+        return deriveSolvingL1StraightThreadCount(blockCount, allowedThreadCount);
+    }
+
+    template<unsigned int block_step>
+    static sizeint estimateCooperativelySolvingL1TransposedMemoryRequirement(unsigned rowCount, SolvingL1TransposedMemoryEstimates &ref_solvingMemoryEstimates);
+
+    static void *markCooperativelySolvingL1TransposedMemoryStructuresOut(void *buffer, 
+        const SolvingL1TransposedMemoryEstimates &solvingMemoryEstimates, 
+        cellindexint *&out_blockProgressDescriptors, SolveL1TransposedCellContext *&out_cellContexts)
+    {
+        void *currentLocation = buffer;
+
+        out_blockProgressDescriptors = (cellindexint *)currentLocation; currentLocation = (uint8 *)currentLocation + solvingMemoryEstimates.m_descriptorSizeRequired;
+        out_cellContexts = (SolveL1TransposedCellContext *)currentLocation; currentLocation = (uint8 *)currentLocation + solvingMemoryEstimates.m_contextSizeRequired;
+        return currentLocation;
+    }
+
+    template<unsigned int block_step>
+    static void *allocateCooperativelySolveL1TransposedMemoryStructures(sizeint &out_sizeAllocated, unsigned rowCount, 
+        cellindexint *&out_blockProgressDescriptors, SolveL1TransposedCellContext *&out_cellContexts);
+    template<unsigned int block_step>
+    static void initializeCooperativelySolveL1TransposedMemoryStructures(unsigned rowCount, 
+        atomicord32 &out_blockCompletionProgress, cellindexint *blockProgressDescriptors, SolveL1TransposedCellContext *cellContexts);
+    template<unsigned int block_step, unsigned int b_stride>
+    static void participateSolvingL1Transposed(const dReal *L, dReal *B, unsigned rowCount, unsigned rowSkip, 
+        volatile atomicord32 &refBlockCompletionProgress/*=0*/, volatile cellindexint *blockProgressDescriptors/*=[blockCount]*/, 
+        SolveL1TransposedCellContext *cellContexts/*=[CCI__MAX x blockCount] + [blockCount]*/, unsigned ownThreadIndex);
+
+private:
+    struct SolveL1TransposedWorkerContext
+    {
+        void init(const dReal *L, dReal *b, unsigned rowCount, unsigned rowSkip, 
+            atomicord32 &ref_blockCompletionProgress, cellindexint *blockProgressDescriptors, SolveL1TransposedCellContext *cellContexts)
+        {
+            m_L = L;
+            m_b = b;
+            m_rowCount = rowCount;
+            m_rowSkip = rowSkip; 
+            m_ptrBlockCompletionProgress = &ref_blockCompletionProgress;
+            m_blockProgressDescriptors = blockProgressDescriptors;
+            m_cellContexts = cellContexts;
+        }
+
+        const dReal     *m_L;
+        dReal           *m_b;
+        unsigned        m_rowCount;
+        unsigned        m_rowSkip; 
+        atomicord32     *m_ptrBlockCompletionProgress;
+        cellindexint    *m_blockProgressDescriptors;
+        SolveL1TransposedCellContext *m_cellContexts;
+    };
+
+    static int solveL1Transposed_worker_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    static void solveL1Transposed_worker(SolveL1TransposedWorkerContext &ref_context, unsigned ownThreadIndex);
+
+    static int solveL1Transposed_completion_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+
+private:
+    struct SolveL1TransposedCellContext
+    {
+        template<unsigned int block_step>
+        static void initializePrecalculatedZs(dReal (&Z)[block_step])
+        {
+            std::fill(Z, Z + block_step, REAL(0.0));
+        }
+
+        template<unsigned int block_step>
+        void loadPrecalculatedZs(dReal (&Z)[block_step]) const
+        {
+            dSASSERT(block_step <= dARRAY_SIZE(m_c));
+
+            std::copy(m_c, m_c + block_step, Z);
+        }
+
+        template<unsigned int block_step>
+        void storePrecalculatedZs(const dReal (&Z)[block_step])
+        {
+            dSASSERT(block_step <= dARRAY_SIZE(m_c));
+
+            std::copy(Z, Z + block_step, m_c);
+        }
+
+        dReal m_c[SL1T_BLOCK_SIZE];
+    };
+
+    static SolveL1TransposedCellContext &buildBlockContextRef(SolveL1TransposedCellContext *cellContexts, unsigned blockIndex, CellContextInstance contextInstance)
+    {
+        return cellContexts[blockIndex * CCI__MAX + contextInstance];
+    }
+
+    static SolveL1TransposedCellContext &buildResultContextRef(SolveL1TransposedCellContext *cellContexts, unsigned blockIndex, unsigned blockCount)
+    {
+        return cellContexts[blockCount * CCI__MAX + blockIndex];
+    }
+
+private:
+    enum
+    {
+        SV_A_STRIDE = 1,
+        SV_D_STRIDE = 1,
+
+        SV_BLOCK_SIZE = 128,
+        SV_COOPERATIVE_BLOCK_COUNT_MINIMUM = 3,
+    };
+
+    static unsigned restrictScalingVectorAllowedThreadCount(
+        dxThreadingBase *threading, unsigned allowedThreadCount, unsigned elementCount);
+    static void doEstimateCooperativeScalingVectorResourceRequirementsValidated(
+        dxResourceRequirementDescriptor *summaryRequirementsDescriptor,
+        unsigned allowedThreadCount, unsigned elementCount);
+    static void doCooperativelyScaleVectorValidated(dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, 
+        dReal *vectorData, const dReal *scaleData, unsigned elementCount);
+
+    static unsigned deriveScalingVectorBlockCount(unsigned elementCount, unsigned blockStep)
+    {
+        return (elementCount + (blockStep - 1)) / blockStep; 
+    }
+
+    static unsigned deriveScalingVectorThreadCount(unsigned lastBlockIndex, unsigned allowedThreadCount)
+    {
+        dIASSERT(allowedThreadCount >= 1);
+
+        unsigned maximumCount = lastBlockIndex;
+        return maximumCount >= allowedThreadCount ? allowedThreadCount : dMACRO_MAX(maximumCount, 1U);
+    }
+
+    static void initializeCooperativelyScaleVectorMemoryStructures(atomicord32 &out_blockCompletionProgress)
+    {
+        out_blockCompletionProgress = 0;
+    }
+    template<unsigned int block_step, unsigned int a_stride, unsigned int d_stride>
+    static void participateScalingVector(dReal *ptrAStart, const dReal *ptrDStart, const unsigned elementCount,
+        volatile atomicord32 &refBlockCompletionProgress/*=0*/);
+
+private:
+    struct ScaleVectorWorkerContext
+    {
+        void init(dReal *vectorData, const dReal *scaleData, unsigned elementCount, 
+            atomicord32 &ref_blockCompletionProgress)
+        {
+            m_vectorData = vectorData;
+            m_scaleData = scaleData;
+            m_elementCount = elementCount;
+            m_ptrBlockCompletionProgress = &ref_blockCompletionProgress;
+        }
+
+        dReal           *m_vectorData;
+        const dReal     *m_scaleData;
+        unsigned        m_elementCount;
+        atomicord32     *m_ptrBlockCompletionProgress;
+    };
+
+    static int scaleVector_worker_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    static void scaleVector_worker(ScaleVectorWorkerContext &ref_context);
+
+    static int scaleVector_completion_callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+
+
+private:
+    enum SolvingLDLTStage
+    {
+        SLDLTS__MIN,
+
+        SLDLTS_SOLVING_STRAIGHT = SLDLTS__MIN,
+        SLDLTS_SCALING_VECTOR,
+        SLDLTS_SOLVING_TRANSPOSED,
+
+        SLDLTS__MAX,
+    };
+
+    enum
+    {
+        SLDLT_B_STRIDE          = SL1S_B_STRIDE,
+        SLDLT_D_STRIDE          = FLDLT_D_STRIDE,
+    };
+
+    static unsigned restrictSolvingLDLTAllowedThreadCount(
+        dxThreadingBase *threading, unsigned allowedThreadCount, unsigned rowCount, unsigned &out_stageBlockCountSifficiencyMask);
+    
+    static void doCooperativelySolveLDLTValidated(
+        dxRequiredResourceContainer *resourceContainer, unsigned allowedThreadCount, unsigned stageBlockCountSifficiencyMask, 
+        const dReal *L, const dReal *d, dReal *b, unsigned rowCount, unsigned rowSkip);
+};
+
+
+#endif
+
diff --git a/libs/ode-0.16.1/ode/src/threading_atomics_provs.h b/libs/ode-0.16.1/ode/src/threading_atomics_provs.h
new file mode 100644
index 0000000..3afc7b3
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_atomics_provs.h
@@ -0,0 +1,194 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading atomics providers file.                                     *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  Fake atomics provider for built-in threading support provider.
+ *  OU-based atomics provider for built-in threading support provider.
+ *
+ *  The classes have been moved into a separate header as they are to be used 
+ *  in both WIN and POSIX implementations.
+ */
+
+
+#ifndef _ODE_THREADING_ATOMICS_PROVS_H_
+#define _ODE_THREADING_ATOMICS_PROVS_H_
+
+
+#include <ode/odeconfig.h>
+#include <ode/error.h>
+
+
+/************************************************************************/
+/* Fake atomics provider class implementation                           */
+/************************************************************************/
+
+class dxFakeAtomicsProvider
+{
+public:
+    typedef unsigned long atomicord_t;
+    typedef void *atomicptr_t;
+
+public:
+    static void IncrementTargetNoRet(volatile atomicord_t *value_accumulator_ptr)
+    {
+        ++(*value_accumulator_ptr);
+    }
+
+    static void DecrementTargetNoRet(volatile atomicord_t *value_accumulator_ptr)
+    {
+        --(*value_accumulator_ptr);
+    }
+
+    static atomicord_t QueryTargetValue(volatile atomicord_t *value_storage_ptr)
+    {
+        return *value_storage_ptr;
+    }
+
+    template<unsigned type_size>
+    static sizeint AddValueToTarget(volatile void *value_accumulator_ptr, diffint value_addend);
+
+    static bool CompareExchangeTargetPtr(volatile atomicptr_t *pointer_storage_ptr, 
+        atomicptr_t comparand_value, atomicptr_t new_value)
+    {
+        bool exchange_result = false;
+
+        atomicptr_t original_value = *pointer_storage_ptr;
+
+        if (original_value == comparand_value)
+        {
+            *pointer_storage_ptr = new_value;
+
+            exchange_result = true;
+        }
+
+        return exchange_result;
+    }
+};
+
+template<>
+inline sizeint dxFakeAtomicsProvider::AddValueToTarget<sizeof(dxFakeAtomicsProvider::atomicord_t)>(volatile void *value_accumulator_ptr, diffint value_addend)
+{
+    atomicord_t original_value = *(volatile atomicord_t *)value_accumulator_ptr;
+
+    *(volatile atomicord_t *)value_accumulator_ptr = original_value + (atomicord_t)value_addend;
+
+    return original_value;
+}
+
+template<>
+inline sizeint dxFakeAtomicsProvider::AddValueToTarget<2 * sizeof(dxFakeAtomicsProvider::atomicord_t)>(volatile void *value_accumulator_ptr, diffint value_addend)
+{
+    atomicptr_t original_value = *(volatile atomicptr_t *)value_accumulator_ptr;
+
+    *(volatile atomicptr_t *)value_accumulator_ptr = (atomicptr_t)((sizeint)original_value + (sizeint)value_addend);
+
+    return (sizeint)original_value;
+}
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+/************************************************************************/
+/* dxOUAtomicsProvider class implementation                             */
+/************************************************************************/
+
+#if !dOU_ENABLED
+#error OU library must be enabled for this to compile
+#elif !dATOMICS_ENABLED
+#error OU Atomics must be enabled for this to compile
+#endif
+#include "odeou.h"
+
+class dxOUAtomicsProvider
+{
+public:
+    typedef _OU_NAMESPACE::atomicord32 atomicord_t;
+    typedef _OU_NAMESPACE::atomicptr atomicptr_t;
+
+public:
+    static void IncrementTargetNoRet(volatile atomicord_t *value_accumulator_ptr)
+    {
+        _OU_NAMESPACE::AtomicIncrementNoResult(value_accumulator_ptr);
+    }
+
+    static void DecrementTargetNoRet(volatile atomicord_t *value_accumulator_ptr)
+    {
+        _OU_NAMESPACE::AtomicDecrementNoResult(value_accumulator_ptr);
+    }
+
+    static atomicord_t QueryTargetValue(volatile atomicord_t *value_storage_ptr)
+    {
+        // Query value with memory barrier before
+        atomicord_t result_value = *value_storage_ptr;
+
+        if (!_OU_NAMESPACE::AtomicCompareExchange(value_storage_ptr, result_value, result_value))
+        {
+            result_value = *value_storage_ptr;
+        }
+
+        return result_value;
+    }
+
+    template<unsigned type_size>
+    static sizeint AddValueToTarget(volatile void *value_accumulator_ptr, diffint value_addend);
+
+    static bool CompareExchangeTargetPtr(volatile atomicptr_t *pointer_storage_ptr, 
+        atomicptr_t comparand_value, atomicptr_t new_value)
+    {
+        return _OU_NAMESPACE::AtomicCompareExchangePointer(pointer_storage_ptr, comparand_value, new_value);
+    }
+};
+
+template<>
+inline sizeint dxOUAtomicsProvider::AddValueToTarget<sizeof(dxOUAtomicsProvider::atomicord_t)>(volatile void *value_accumulator_ptr, diffint value_addend)
+{
+    return _OU_NAMESPACE::AtomicExchangeAdd((volatile atomicord_t *)value_accumulator_ptr, (atomicord_t)value_addend);
+}
+
+template<>
+inline sizeint dxOUAtomicsProvider::AddValueToTarget<2 * sizeof(dxOUAtomicsProvider::atomicord_t)>(volatile void *value_accumulator_ptr, diffint value_addend)
+{
+    atomicptr_t original_value;
+
+    while (true)
+    {
+        original_value = *(volatile atomicptr_t *)value_accumulator_ptr;
+
+        atomicptr_t new_value = (atomicptr_t)((sizeint)original_value + (sizeint)value_addend);
+        if (_OU_NAMESPACE::AtomicCompareExchangePointer((volatile atomicptr_t *)value_accumulator_ptr, original_value, new_value))
+        {
+            break;
+        }
+    }
+
+    return (sizeint)original_value;
+}
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+#endif // #ifndef _ODE_THREADING_ATOMICS_PROVS_H_
diff --git a/libs/ode-0.16.1/ode/src/threading_base.cpp b/libs/ode-0.16.1/ode/src/threading_base.cpp
new file mode 100644
index 0000000..9272eff
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_base.cpp
@@ -0,0 +1,135 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading base wrapper class implementation file.                     *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * Threading base class to be used for inheritance by dxWorld, dxSpace and others 
+ * to take advantage of threaded execution.
+ */
+
+
+#include <ode/common.h>
+#include "config.h"
+#include "error.h"
+#include "threading_base.h"
+
+
+dxThreadingBase::~dxThreadingBase()
+{
+    DoFreeStockCallWait();
+}
+
+
+void dxThreadingBase::PostThreadedCallsGroup(
+    int *out_summary_fault/*=NULL*/, 
+    ddependencycount_t member_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+    dThreadedCallFunction *call_func, void *call_context, 
+    const char *call_name/*=NULL*/) const
+{
+    dIASSERT(member_count != 0);
+
+    dThreadingImplementationID impl;
+    const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+
+    for (unsigned member_index = 0; member_index != member_count; ++member_index) {
+        // Post individual group member jobs
+        functions->post_call(impl, out_summary_fault, NULL, 0, dependent_releasee, NULL, call_func, call_context, member_index, call_name);
+    }
+}
+
+void dxThreadingBase::PostThreadedCallsIndexOverridenGroup(int *out_summary_fault/*=NULL*/, 
+    ddependencycount_t member_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+    dThreadedCallFunction *call_func, void *call_context, unsigned index_override, 
+    const char *call_name/*=NULL*/) const
+{
+    dIASSERT(member_count != 0);
+
+    dThreadingImplementationID impl;
+    const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+
+    for (unsigned member_index = 0; member_index != member_count; ++member_index) {
+        // Post individual group member jobs
+        functions->post_call(impl, out_summary_fault, NULL, 0, dependent_releasee, NULL, call_func, call_context, index_override, call_name);
+    }
+}
+
+void dxThreadingBase::PostThreadedCallForUnawareReleasee(
+    int *out_summary_fault/*=NULL*/, 
+    dCallReleaseeID *out_post_releasee/*=NULL*/, ddependencycount_t dependencies_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+    dCallWaitID call_wait/*=NULL*/, 
+    dThreadedCallFunction *call_func, void *call_context, dcallindex_t instance_index, 
+    const char *call_name/*=NULL*/) const
+{
+    dThreadingImplementationID impl;
+    const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+
+    functions->alter_call_dependencies_count(impl, dependent_releasee, 1);
+    functions->post_call(impl, out_summary_fault, out_post_releasee, dependencies_count, dependent_releasee, call_wait, call_func, call_context, instance_index, call_name);
+}
+
+
+const dxThreadingFunctionsInfo *dxThreadingBase::FindThreadingImpl(dThreadingImplementationID &out_impl_found) const
+{
+    const dxThreadingFunctionsInfo *functions_found = GetFunctionsInfo();
+
+    if (functions_found != NULL)
+    {
+        out_impl_found = GetThreadingImpl();
+    }
+    else
+    {
+        functions_found = m_default_impl_provider->retrieveThreadingDefaultImpl(out_impl_found);
+    }
+
+    return functions_found;
+}
+
+
+dCallWaitID dxThreadingBase::DoAllocateStockCallWait()
+{
+    dIASSERT(GetStockCallWait() == NULL);
+
+    dCallWaitID stock_wait_id = AllocThreadedCallWait();
+
+    if (stock_wait_id != NULL)
+    {
+        SetStockCallWait(stock_wait_id);
+    }
+
+    return stock_wait_id;
+}
+
+void dxThreadingBase::DoFreeStockCallWait()
+{
+    dCallWaitID stock_wait_id = GetStockCallWait();
+
+    if (stock_wait_id != NULL)
+    {
+        FreeThreadedCallWait(stock_wait_id);
+        
+        SetStockCallWait(NULL);
+    }
+}
+
diff --git a/libs/ode-0.16.1/ode/src/threading_base.h b/libs/ode-0.16.1/ode/src/threading_base.h
new file mode 100644
index 0000000..cb38f7f
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_base.h
@@ -0,0 +1,291 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading base wrapper class header file.                             *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ * Threading base class to be used for inheritance by dxWorld, dxSpace and others 
+ * to take advantage of threaded execution.
+ */
+
+
+#ifndef _ODE_THREADING_BASE_H_
+#define _ODE_THREADING_BASE_H_
+
+
+#include "common.h"
+#include <ode/threading.h>
+
+
+struct dxIThreadingDefaultImplProvider
+{
+public:
+    virtual const dxThreadingFunctionsInfo *retrieveThreadingDefaultImpl(dThreadingImplementationID &out_defaultImpl) = 0;
+};
+
+
+class dxThreadingBase
+{
+protected:
+    dxThreadingBase():
+         m_default_impl_provider(NULL),
+         m_functions_info(NULL), 
+         m_threading_impl(NULL),
+         m_stock_call_wait(NULL)
+     {
+     }
+
+     // This ought to be done via constructor, but passing 'this' in base class initializer emits a warning in MSVC :(
+     void setThreadingDefaultImplProvider(dxIThreadingDefaultImplProvider *default_impl_provider)
+     {
+         m_default_impl_provider = default_impl_provider;
+         dIASSERT(GetStockCallWait() == NULL);
+     }
+
+     ~dxThreadingBase();
+
+public:
+    void assignThreadingImpl(const dxThreadingFunctionsInfo *functions_info, dThreadingImplementationID threading_impl)
+    {
+        dAASSERT((functions_info == NULL) == (threading_impl == NULL));
+
+        // Free the stock call wait first to have it executed before new pointer values are assigned
+        DoFreeStockCallWait();
+
+        m_functions_info = functions_info;
+        m_threading_impl = threading_impl;
+    }
+
+public:
+    unsigned calculateThreadingLimitedThreadCount(unsigned limitValue, bool countCallerAsExtraThread, unsigned *ptrOut_activeThreadCount=NULL) const
+    {
+        unsigned activeThreadCount = RetrieveThreadingThreadCount();
+
+        if (ptrOut_activeThreadCount != NULL)
+        {
+            *ptrOut_activeThreadCount = activeThreadCount;
+        }
+
+        unsigned adjustedActiveThreads = countCallerAsExtraThread && activeThreadCount != UINT_MAX ? activeThreadCount + 1 : activeThreadCount;
+        return limitValue == dTHREADING_THREAD_COUNT_UNLIMITED 
+            ? adjustedActiveThreads 
+            : dMACRO_MIN(limitValue, adjustedActiveThreads);
+    }
+
+public:
+    dCallWaitID AllocateOrRetrieveStockCallWaitID()
+    {
+        dCallWaitID stock_wait_id = GetStockCallWait();
+        return stock_wait_id != NULL ? (ResetThreadedCallWait(stock_wait_id), stock_wait_id) : DoAllocateStockCallWait(); 
+    }
+
+public:
+    dMutexGroupID AllocMutexGroup(dmutexindex_t Mutex_count, const char *const *Mutex_names_ptr/*=NULL*/) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        return functions->alloc_mutex_group(impl, Mutex_count, Mutex_names_ptr);
+    }
+
+    void FreeMutexGroup(dMutexGroupID mutex_group) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        functions->free_mutex_group(impl, mutex_group);
+    }
+
+    void LockMutexGroupMutex(dMutexGroupID mutex_group, dmutexindex_t mutex_index) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        functions->lock_group_mutex(impl, mutex_group, mutex_index);
+    }
+
+//     bool TryLockMutexGroupMutex(dMutexGroupID mutex_group, dmutexindex_t mutex_index) const
+//     {
+//         dThreadingImplementationID impl;
+//         const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+//         return functions->trylock_group_mutex(impl, mutex_group, mutex_index) != 0;
+//     }
+
+    void UnlockMutexGroupMutex(dMutexGroupID mutex_group, dmutexindex_t mutex_index) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        functions->unlock_group_mutex(impl, mutex_group, mutex_index);
+    }
+
+    dCallWaitID AllocThreadedCallWait() const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        return functions->alloc_call_wait(impl);
+    }
+
+    void ResetThreadedCallWait(dCallWaitID call_wait) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        functions->reset_call_wait(impl, call_wait);
+    }
+
+    void FreeThreadedCallWait(dCallWaitID call_wait) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        functions->free_call_wait(impl, call_wait);
+    }
+
+    void PostThreadedCall(int *out_summary_fault/*=NULL*/, 
+        dCallReleaseeID *out_post_releasee/*=NULL*/, ddependencycount_t dependencies_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+        dCallWaitID call_wait/*=NULL*/, 
+        dThreadedCallFunction *call_func, void *call_context, dcallindex_t instance_index, 
+        const char *call_name/*=NULL*/) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        functions->post_call(impl, out_summary_fault, out_post_releasee, dependencies_count, dependent_releasee, call_wait, call_func, call_context, instance_index, call_name);
+    }
+
+    void AlterThreadedCallDependenciesCount(dCallReleaseeID target_releasee, 
+        ddependencychange_t dependencies_count_change) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        functions->alter_call_dependencies_count(impl, target_releasee, dependencies_count_change);
+    }
+
+    void WaitThreadedCallExclusively(int *out_wait_status/*=NULL*/, 
+        dCallWaitID call_wait, const dThreadedWaitTime *timeout_time_ptr/*=NULL*/, 
+        const char *wait_name/*=NULL*/) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        functions->wait_call(impl, out_wait_status, call_wait, timeout_time_ptr, wait_name);
+        functions->reset_call_wait(impl, call_wait);
+    }
+
+    void WaitThreadedCallCollectively(int *out_wait_status/*=NULL*/, 
+        dCallWaitID call_wait, const dThreadedWaitTime *timeout_time_ptr/*=NULL*/, 
+        const char *wait_name/*=NULL*/) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        functions->wait_call(impl, out_wait_status, call_wait, timeout_time_ptr, wait_name);
+    }
+
+    unsigned RetrieveThreadingThreadCount() const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        return functions->retrieve_thread_count(impl);
+    }
+
+    bool PreallocateResourcesForThreadedCalls(unsigned max_simultaneous_calls_estimate) const
+    {
+        dThreadingImplementationID impl;
+        const dxThreadingFunctionsInfo *functions = FindThreadingImpl(impl);
+        return functions->preallocate_resources_for_calls(impl, max_simultaneous_calls_estimate) != 0;
+    }
+
+public:
+    void PostThreadedCallsGroup(int *out_summary_fault/*=NULL*/, 
+        ddependencycount_t member_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+        dThreadedCallFunction *call_func, void *call_context, 
+        const char *call_name/*=NULL*/) const;
+    void PostThreadedCallsIndexOverridenGroup(int *out_summary_fault/*=NULL*/, 
+        ddependencycount_t member_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+        dThreadedCallFunction *call_func, void *call_context, unsigned index_override, 
+        const char *call_name/*=NULL*/) const;
+    void PostThreadedCallForUnawareReleasee(int *out_summary_fault/*=NULL*/, 
+        dCallReleaseeID *out_post_releasee/*=NULL*/, ddependencycount_t dependencies_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+        dCallWaitID call_wait/*=NULL*/, 
+        dThreadedCallFunction *call_func, void *call_context, dcallindex_t instance_index, 
+        const char *call_name/*=NULL*/) const;
+
+protected:
+    const dxThreadingFunctionsInfo *FindThreadingImpl(dThreadingImplementationID &out_impl_found) const;
+
+private:
+    dCallWaitID DoAllocateStockCallWait();
+    void DoFreeStockCallWait();
+
+private:
+    const dxThreadingFunctionsInfo *GetFunctionsInfo() const { return m_functions_info; }
+    dThreadingImplementationID GetThreadingImpl() const { return m_threading_impl; }
+
+    void SetStockCallWait(dCallWaitID value) { m_stock_call_wait = value; }
+    dCallWaitID GetStockCallWait() const { return m_stock_call_wait; }
+
+private:
+    dxIThreadingDefaultImplProvider   *m_default_impl_provider;
+    const dxThreadingFunctionsInfo    *m_functions_info;
+    dThreadingImplementationID        m_threading_impl;
+    dCallWaitID                       m_stock_call_wait;
+};
+
+class dxMutexGroupLockHelper
+{
+public:
+    dxMutexGroupLockHelper(dxThreadingBase *threading_base, dMutexGroupID mutex_group, dmutexindex_t mutex_index):
+        m_threading_base(threading_base),
+        m_mutex_group(mutex_group),
+        m_mutex_index(mutex_index),
+        m_mutex_locked(true)
+    {
+        threading_base->LockMutexGroupMutex(mutex_group, mutex_index);
+    }
+
+    ~dxMutexGroupLockHelper()
+    {
+        if (m_mutex_locked)
+        {
+            m_threading_base->UnlockMutexGroupMutex(m_mutex_group, m_mutex_index);
+        }
+    }
+
+    void UnlockMutex()
+    {
+        dIASSERT(m_mutex_locked);
+
+        m_threading_base->UnlockMutexGroupMutex(m_mutex_group, m_mutex_index);
+        m_mutex_locked = false;
+    }
+
+    void RelockMutex()
+    {
+        dIASSERT(!m_mutex_locked);
+
+        m_threading_base->LockMutexGroupMutex(m_mutex_group, m_mutex_index);
+        m_mutex_locked = true;
+    }
+
+private:
+    dxThreadingBase                   *m_threading_base;
+    dMutexGroupID                   m_mutex_group;
+    dmutexindex_t                     m_mutex_index;
+    bool                              m_mutex_locked;
+};
+
+#endif // #ifndef _ODE_THREADING_BASE_H_
diff --git a/libs/ode-0.16.1/ode/src/threading_fake_sync.h b/libs/ode-0.16.1/ode/src/threading_fake_sync.h
new file mode 100644
index 0000000..d1c2524
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_fake_sync.h
@@ -0,0 +1,128 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading fake synchronization objects file.                          *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  Self-wakeup implementation for built-in threading support provider.
+ *  Fake mutex implementation for built-in threading support provider.
+ *
+ *  The classes have been moved into a separate header as they are to be used 
+ *  in both WIN and POSIX implementations.
+ */
+
+
+#ifndef _ODE_THREADING_FAKE_SYNC_H_
+#define _ODE_THREADING_FAKE_SYNC_H_
+
+
+#include <ode/odeconfig.h>
+#include <ode/error.h>
+
+
+/************************************************************************/
+/* dxSelfWakeup class definition                                        */
+/************************************************************************/
+
+class dxSelfWakeup
+{
+public:
+    dxSelfWakeup():
+        m_wakeup_state(false),
+        m_state_is_permanent(false)
+    {
+    }
+
+    bool InitializeObject() { return true; }
+
+public:
+    void ResetWakeup() { m_wakeup_state = false; m_state_is_permanent = false; }
+    void WakeupAThread() { dIASSERT(!m_state_is_permanent); m_wakeup_state = true; } // Wakeup should not be used after permanent signal
+    void WakeupAllThreads() { m_wakeup_state = true; m_state_is_permanent = true; }
+
+    bool WaitWakeup(const dThreadedWaitTime *timeout_time_ptr);
+
+private:
+    bool          m_wakeup_state;
+    bool          m_state_is_permanent;
+};
+
+
+bool dxSelfWakeup::WaitWakeup(const dThreadedWaitTime *timeout_time_ptr)
+{
+    (void)timeout_time_ptr; // unused
+    bool wait_result = m_wakeup_state;
+
+    if (m_wakeup_state)
+    {
+        m_wakeup_state = m_state_is_permanent;
+    }
+    else
+    {
+        dICHECK(false); // Self-wakeup should only be used in cases when waiting is called after object is signaled
+    }
+
+    return wait_result;
+}
+
+
+/************************************************************************/
+/* Fake mutex class implementation                                      */
+/************************************************************************/
+
+class dxFakeMutex
+{
+public:
+    dxFakeMutex() {}
+
+    bool InitializeObject() { return true; }
+
+public:
+    void LockMutex() { /* Do nothing */ }
+    bool TryLockMutex() { /* Do nothing */ return true; }
+    void UnlockMutex() { /* Do nothing */ }
+};
+
+
+/************************************************************************/
+/* Fake lull class implementation                                      */
+/************************************************************************/
+
+class dxFakeLull
+{
+public:
+    dxFakeLull() {}
+
+    bool InitializeObject() { return true; }
+
+public:
+    void RegisterToLull() { /* Do nothing */ }
+    void WaitForLullAlarm() { dICHECK(false); } // Fake lull can't be waited
+    void UnregisterFromLull() { /* Do nothing */ }
+
+    void SignalLullAlarmIfAnyRegistrants() { /* Do nothing */ }
+};
+
+
+#endif // #ifndef _ODE_THREADING_FAKE_SYNC_H_
diff --git a/libs/ode-0.16.1/ode/src/threading_impl.cpp b/libs/ode-0.16.1/ode/src/threading_impl.cpp
new file mode 100644
index 0000000..aa30883
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_impl.cpp
@@ -0,0 +1,282 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading subsystem implementation file.                              *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  Subsystem APIs implementation for built-in threading support provider.
+ */
+
+
+#include <ode/common.h>
+#include <ode/threading_impl.h>
+#include "config.h"
+#include "threading_impl_posix.h"
+#include "threading_impl_win.h"
+#include "threading_impl.h"
+
+
+static dMutexGroupID AllocMutexGroup(dThreadingImplementationID impl, dmutexindex_t Mutex_count, const char *const *Mutex_names_ptr/*=NULL*/);
+static void FreeMutexGroup(dThreadingImplementationID impl, dMutexGroupID mutex_group);
+static void LockMutexGroupMutex(dThreadingImplementationID impl, dMutexGroupID mutex_group, dmutexindex_t mutex_index);
+// static int TryLockMutexGroupMutex(dThreadingImplementationID impl, dMutexGroupID mutex_group, dmutexindex_t mutex_index);
+static void UnlockMutexGroupMutex(dThreadingImplementationID impl, dMutexGroupID mutex_group, dmutexindex_t mutex_index);
+
+static dCallWaitID AllocThreadedCallWait(dThreadingImplementationID impl);
+static void ResetThreadedCallWait(dThreadingImplementationID impl, dCallWaitID call_wait);
+static void FreeThreadedCallWait(dThreadingImplementationID impl, dCallWaitID call_wait);
+
+static void PostThreadedCall(
+    dThreadingImplementationID impl, int *out_summary_fault/*=NULL*/, 
+    dCallReleaseeID *out_post_releasee/*=NULL*/, ddependencycount_t dependencies_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+    dCallWaitID call_wait/*=NULL*/, 
+    dThreadedCallFunction *call_func, void *call_context, dcallindex_t instance_index, 
+    const char *call_name/*=NULL*/);
+static void AlterThreadedCallDependenciesCount(
+    dThreadingImplementationID impl, dCallReleaseeID target_releasee, 
+    ddependencychange_t dependencies_count_change);
+static void WaitThreadedCall(
+    dThreadingImplementationID impl, int *out_wait_status/*=NULL*/, 
+    dCallWaitID call_wait, const dThreadedWaitTime *timeout_time_ptr/*=NULL*/, 
+    const char *wait_name/*=NULL*/);
+
+static unsigned RetrieveThreadingThreadCount(dThreadingImplementationID impl);
+static int PreallocateResourcesForThreadedCalls(dThreadingImplementationID impl, ddependencycount_t max_simultaneous_calls_estimate);
+
+
+static const dxThreadingFunctionsInfo g_builtin_threading_functions = 
+{
+    sizeof(dxThreadingFunctionsInfo), // unsigned struct_size;
+
+    &AllocMutexGroup, // dMutexGroupAllocFunction *alloc_mutex_group;
+    &FreeMutexGroup, // dMutexGroupFreeFunction *free_mutex_group;
+    &LockMutexGroupMutex, // dMutexGroupMutexLockFunction *lock_group_mutex;
+    &UnlockMutexGroupMutex, // dMutexGroupMutexUnlockFunction *unlock_group_mutex;
+
+    &AllocThreadedCallWait, // dThreadedCallWaitAllocFunction *alloc_call_wait;
+    &ResetThreadedCallWait, // dThreadedCallWaitResetFunction *reset_call_wait;
+    &FreeThreadedCallWait, // dThreadedCallWaitFreeFunction *free_call_wait;
+
+    &PostThreadedCall, // dThreadedCallPostFunction *post_call;
+    &AlterThreadedCallDependenciesCount, // dThreadedCallDependenciesCountAlterFunction *alter_call_dependencies_count;
+    &WaitThreadedCall, // dThreadedCallWaitFunction *wait_call;
+
+    &RetrieveThreadingThreadCount, // dThreadingImplThreadCountRetrieveFunction *retrieve_thread_count;
+    &PreallocateResourcesForThreadedCalls, // dThreadingImplResourcesForCallsPreallocateFunction *preallocate_resources_for_calls;
+
+    // &TryLockMutexGroupMutex, // dMutexGroupMutexTryLockFunction *trylock_group_mutex;
+};
+
+
+/*extern */dThreadingImplementationID dThreadingAllocateSelfThreadedImplementation()
+{
+    dxSelfThreadedThreading *threading = new dxSelfThreadedThreading();
+
+    if (threading != NULL && !threading->InitializeObject())
+    {
+        delete threading;
+        threading = NULL;
+    }
+
+    dxIThreadingImplementation *impl = threading;
+    return (dThreadingImplementationID)impl;
+}
+
+/*extern */dThreadingImplementationID dThreadingAllocateMultiThreadedImplementation()
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dxMultiThreadedThreading *threading = new dxMultiThreadedThreading();
+
+    if (threading != NULL && !threading->InitializeObject())
+    {
+        delete threading;
+        threading = NULL;
+    }
+#else
+    dxIThreadingImplementation *threading = NULL;
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+    dxIThreadingImplementation *impl = threading;
+    return (dThreadingImplementationID)impl;
+}
+
+/*extern */const dThreadingFunctionsInfo *dThreadingImplementationGetFunctions(dThreadingImplementationID impl)
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dAASSERT(impl != NULL);
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+    const dThreadingFunctionsInfo *functions = NULL;
+
+#if !dBUILTIN_THREADING_IMPL_ENABLED
+    if (impl != NULL)
+#endif // #if !dBUILTIN_THREADING_IMPL_ENABLED
+    {
+        functions = &g_builtin_threading_functions;
+    }
+
+    return functions;
+}
+
+/*extern */void dThreadingImplementationShutdownProcessing(dThreadingImplementationID impl)
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dAASSERT(impl != NULL);
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+#if !dBUILTIN_THREADING_IMPL_ENABLED
+    if (impl != NULL)
+#endif // #if !dBUILTIN_THREADING_IMPL_ENABLED
+    {
+        ((dxIThreadingImplementation *)impl)->ShutdownProcessing();
+    }
+}
+
+/*extern */void dThreadingImplementationCleanupForRestart(dThreadingImplementationID impl)
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dAASSERT(impl != NULL);
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+#if !dBUILTIN_THREADING_IMPL_ENABLED
+    if (impl != NULL)
+#endif // #if !dBUILTIN_THREADING_IMPL_ENABLED
+    {
+        ((dxIThreadingImplementation *)impl)->CleanupForRestart();
+    }
+}
+
+/*extern */void dThreadingFreeImplementation(dThreadingImplementationID impl)
+{
+    if (impl != NULL)
+    {
+        ((dxIThreadingImplementation *)impl)->FreeInstance();
+    }
+}
+
+
+/*extern */void dExternalThreadingServeMultiThreadedImplementation(dThreadingImplementationID impl, 
+                                                                   dThreadReadyToServeCallback *readiness_callback/*=NULL*/, void *callback_context/*=NULL*/)
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dAASSERT(impl != NULL);
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+#if !dBUILTIN_THREADING_IMPL_ENABLED
+    if (impl != NULL)
+#endif // #if !dBUILTIN_THREADING_IMPL_ENABLED
+    {
+        ((dxIThreadingImplementation *)impl)->StickToJobsProcessing(readiness_callback, callback_context);
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+
+static dMutexGroupID AllocMutexGroup(dThreadingImplementationID impl, dmutexindex_t Mutex_count, const char *const *Mutex_names_ptr/*=NULL*/)
+{
+    (void)Mutex_names_ptr; // unused
+    dIMutexGroup *mutex_group = ((dxIThreadingImplementation *)impl)->AllocMutexGroup(Mutex_count);
+    return (dMutexGroupID)mutex_group;
+}
+
+static void FreeMutexGroup(dThreadingImplementationID impl, dMutexGroupID mutex_group)
+{
+    ((dxIThreadingImplementation *)impl)->FreeMutexGroup((dIMutexGroup *)mutex_group);
+}
+
+static void LockMutexGroupMutex(dThreadingImplementationID impl, dMutexGroupID mutex_group, dmutexindex_t mutex_index)
+{
+    ((dxIThreadingImplementation *)impl)->LockMutexGroupMutex((dIMutexGroup *)mutex_group, mutex_index);
+}
+
+// static int TryLockMutexGroupMutex(dThreadingImplementationID impl, dMutexGroupID mutex_group, dmutexindex_t mutex_index)
+// {
+//   bool trylock_result = ((dxIThreadingImplementation *)impl)->TryLockMutexGroupMutex((dIMutexGroup *)mutex_group, mutex_index);
+//   return trylock_result;
+// }
+
+static void UnlockMutexGroupMutex(dThreadingImplementationID impl, dMutexGroupID mutex_group, dmutexindex_t mutex_index)
+{
+    ((dxIThreadingImplementation *)impl)->UnlockMutexGroupMutex((dIMutexGroup *)mutex_group, mutex_index);
+}
+
+
+static dCallWaitID AllocThreadedCallWait(dThreadingImplementationID impl)
+{
+    dxICallWait *call_wait = ((dxIThreadingImplementation *)impl)->AllocACallWait();
+    return (dCallWaitID)call_wait;
+}
+
+static void ResetThreadedCallWait(dThreadingImplementationID impl, dCallWaitID call_wait)
+{
+    ((dxIThreadingImplementation *)impl)->ResetACallWait((dxICallWait *)call_wait);
+}
+
+static void FreeThreadedCallWait(dThreadingImplementationID impl, dCallWaitID call_wait)
+{
+    ((dxIThreadingImplementation *)impl)->FreeACallWait((dxICallWait *)call_wait);
+}
+
+
+static void PostThreadedCall(
+    dThreadingImplementationID impl, int *out_summary_fault/*=NULL*/, 
+    dCallReleaseeID *out_post_releasee/*=NULL*/, ddependencycount_t dependencies_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+    dCallWaitID call_wait/*=NULL*/, 
+    dThreadedCallFunction *call_func, void *call_context, dcallindex_t instance_index, 
+    const char *call_name/*=NULL*/)
+{
+    (void)call_name; // unused
+    ((dxIThreadingImplementation *)impl)->ScheduleNewJob(out_summary_fault, out_post_releasee, 
+        dependencies_count, dependent_releasee, (dxICallWait *)call_wait, call_func, call_context, instance_index);
+}
+
+static void AlterThreadedCallDependenciesCount(
+    dThreadingImplementationID impl, dCallReleaseeID target_releasee, 
+    ddependencychange_t dependencies_count_change)
+{
+    ((dxIThreadingImplementation *)impl)->AlterJobDependenciesCount(target_releasee, dependencies_count_change);
+}
+
+static void WaitThreadedCall(
+    dThreadingImplementationID impl, int *out_wait_status/*=NULL*/, 
+    dCallWaitID call_wait, const dThreadedWaitTime *timeout_time_ptr/*=NULL*/, 
+    const char *wait_name/*=NULL*/)
+{
+    (void)wait_name; // unused
+    ((dxIThreadingImplementation *)impl)->WaitJobCompletion(out_wait_status, (dxICallWait *)call_wait, timeout_time_ptr);
+}
+
+
+static unsigned RetrieveThreadingThreadCount(dThreadingImplementationID impl)
+{
+    return ((dxIThreadingImplementation *)impl)->RetrieveActiveThreadsCount();
+}
+
+static int PreallocateResourcesForThreadedCalls(dThreadingImplementationID impl, ddependencycount_t max_simultaneous_calls_estimate)
+{
+    return ((dxIThreadingImplementation *)impl)->PreallocateJobInfos(max_simultaneous_calls_estimate);
+}
+
+
diff --git a/libs/ode-0.16.1/ode/src/threading_impl.h b/libs/ode-0.16.1/ode/src/threading_impl.h
new file mode 100644
index 0000000..7fb5c60
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_impl.h
@@ -0,0 +1,40 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading implementation private header file.                         *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  Threading implementation header for library private functions.
+ */
+
+
+#ifndef _ODE__PRIVATE_THREADING_IMPL_H_
+#define _ODE__PRIVATE_THREADING_IMPL_H_
+
+
+#include <ode/threading_impl.h>
+
+
+
+#endif // #ifndef _ODE__PRIVATE_THREADING_IMPL_H_
diff --git a/libs/ode-0.16.1/ode/src/threading_impl_posix.h b/libs/ode-0.16.1/ode/src/threading_impl_posix.h
new file mode 100644
index 0000000..0aaf4ae
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_impl_posix.h
@@ -0,0 +1,638 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading POSIX implementation file.                                  *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  Threading POSIX implementation for built-in threading support provider.
+ */
+
+
+#ifndef _ODE_THREADING_IMPL_POSIX_H_
+#define _ODE_THREADING_IMPL_POSIX_H_
+
+
+#include <ode/common.h>
+
+
+#if !defined(_WIN32)
+
+
+#include "threading_impl_templates.h"
+#include "threading_fake_sync.h"
+#include "threading_atomics_provs.h"
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+#include <pthread.h>
+#include <time.h>
+#include <errno.h>
+
+#if !defined(EOK)
+#define EOK   0
+#endif
+
+
+#if defined(__APPLE__)
+
+#if HAVE_GETTIMEOFDAY
+
+#include <sys/time.h>
+
+#if !defined(CLOCK_MONOTONIC)
+#define CLOCK_MONOTONIC 2
+#endif
+
+static inline 
+int _condvar_clock_gettime(int clock_type, timespec *ts)
+{
+    (void)clock_type; // Unused
+    timeval tv;
+    return gettimeofday(&tv, NULL) == 0 ? (ts->tv_sec = tv.tv_sec, ts->tv_nsec = tv.tv_usec * 1000, 0) : (-1);
+}
+
+
+#else // #if !HAVE_GETTIMEOFDAY
+
+#error It is necessary to check manuals for the correct way of getting condvar wait time for this Apple system
+
+
+#endif // #if !HAVE_GETTIMEOFDAY
+
+
+#else // #if !defined(__APPLE__)
+
+#if !HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_NO_PTHREAD_CONDATTR_SETCLOCK
+
+// The code must be compiled without autoconf run, having the project generated by other means.
+// Assume the pthread_condattr_setclock() is available as it is true in most cases and it is the best we can do in such cases.
+#define HAVE_PTHREAD_CONDATTR_SETCLOCK 1
+
+
+#endif // #if !HAVE_PTHREAD_CONDATTR_SETCLOCK && !HAVE_NO_PTHREAD_CONDATTR_SETCLOCK
+
+
+#if HAVE_PTHREAD_CONDATTR_SETCLOCK
+
+static inline 
+int _condvar_clock_gettime(int clock_type, timespec *ts)
+{
+    return clock_gettime(clock_type, ts);
+}
+
+
+#else // #if !HAVE_PTHREAD_CONDATTR_SETCLOCK
+
+#error It is necessary to check manuals for the correct way of getting condvar wait time for this system
+
+
+#endif // #if !HAVE_PTHREAD_CONDATTR_SETCLOCK
+
+
+#endif // #if !defined(__APPLE__)
+
+
+/************************************************************************/
+/* dxCondvarWakeup class implementation                                 */
+/************************************************************************/
+
+class dxCondvarWakeup
+{
+public:
+    dxCondvarWakeup(): m_waiters_list(NULL), m_signaled_state(false), m_state_is_permanent(false), m_object_initialized(false) {}
+    ~dxCondvarWakeup() { DoFinalizeObject(); }
+
+    bool InitializeObject() { return DoInitializeObject(); }
+
+private:
+    bool DoInitializeObject();
+    void DoFinalizeObject();
+
+public:
+    void ResetWakeup();
+    void WakeupAThread();
+    void WakeupAllThreads();
+
+    bool WaitWakeup(const dThreadedWaitTime *timeout_time_ptr);
+
+private:
+    bool BlockAsAWaiter(const dThreadedWaitTime *timeout_time_ptr);
+
+private:
+    struct dxWaiterInfo
+    {
+        dxWaiterInfo(): m_signal_state(false) {}
+
+        dxWaiterInfo      **m_prev_info_ptr;
+        dxWaiterInfo      *m_next_info;
+        bool              m_signal_state;
+    };
+
+    void RegisterWaiterInList(dxWaiterInfo *waiter_info);
+    void UnregisterWaiterFromList(dxWaiterInfo *waiter_info);
+
+    bool MarkSignaledFirstWaiter();
+    static bool MarkSignaledFirstWaiterMeaningful(dxWaiterInfo *first_waiter);
+    bool MarkSignaledAllWaiters();
+    static bool MarkSignaledAllWaitersMeaningful(dxWaiterInfo *first_waiter);
+
+private:
+    dxWaiterInfo  *m_waiters_list;
+    bool          m_signaled_state;
+    bool          m_state_is_permanent;
+    bool          m_object_initialized;
+    pthread_mutex_t m_wakeup_mutex;
+    pthread_cond_t m_wakeup_cond;
+};
+
+
+bool dxCondvarWakeup::DoInitializeObject()
+{
+    dIASSERT(!m_object_initialized);
+
+    bool init_result = false;
+
+    pthread_condattr_t cond_condattr;
+    bool mutex_initialized = false, condattr_initialized = false;
+
+    do
+    {
+        int mutex_result = pthread_mutex_init(&m_wakeup_mutex, NULL);
+        if (mutex_result != EOK)
+        {
+            errno = mutex_result;
+            break;
+        }
+
+        mutex_initialized = true;
+
+        int condattr_init_result = pthread_condattr_init(&cond_condattr);
+        if (condattr_init_result != EOK)
+        {
+            errno = condattr_init_result;
+            break;
+        }
+
+        condattr_initialized = true;
+
+#if HAVE_PTHREAD_CONDATTR_SETCLOCK
+        int condattr_clock_result = pthread_condattr_setclock(&cond_condattr, CLOCK_MONOTONIC);
+        if (condattr_clock_result != EOK)
+        {
+            errno = condattr_clock_result;
+            break;
+        }
+#endif // #if HAVE_PTHREAD_CONDATTR_SETCLOCK
+
+        int cond_result = pthread_cond_init(&m_wakeup_cond, &cond_condattr);
+        if (cond_result != EOK)
+        {
+            errno = cond_result;
+            break;
+        }
+
+        pthread_condattr_destroy(&cond_condattr); // result can be ignored
+
+        m_object_initialized = true;
+        init_result = true;
+    }
+    while (false);
+
+    if (!init_result)
+    {
+        if (mutex_initialized)
+        {
+            if (condattr_initialized)
+            {
+                int condattr_destroy_result = pthread_condattr_destroy(&cond_condattr);
+                dICHECK(condattr_destroy_result == EOK || ((errno = condattr_destroy_result), false));
+            }
+
+            int mutex_destroy_result = pthread_mutex_destroy(&m_wakeup_mutex);
+            dICHECK(mutex_destroy_result == EOK || ((errno = mutex_destroy_result), false));
+        }
+    }
+
+    return init_result;
+
+}
+
+void dxCondvarWakeup::DoFinalizeObject()
+{
+    if (m_object_initialized)
+    {
+        int cond_result = pthread_cond_destroy(&m_wakeup_cond);
+        dICHECK(cond_result == EOK || ((errno = cond_result), false));
+
+        int mutex_result = pthread_mutex_destroy(&m_wakeup_mutex);
+        dICHECK(mutex_result == EOK || ((errno = mutex_result), false));
+
+        m_object_initialized = false;
+    }
+}
+
+
+void dxCondvarWakeup::ResetWakeup()
+{
+    int lock_result = pthread_mutex_lock(&m_wakeup_mutex);
+    dICHECK(lock_result == EOK || ((errno = lock_result), false));
+
+    m_signaled_state = false;
+    m_state_is_permanent = false;
+
+    int unlock_result = pthread_mutex_unlock(&m_wakeup_mutex);
+    dICHECK(unlock_result == EOK || ((errno = unlock_result), false));
+}
+
+void dxCondvarWakeup::WakeupAThread()
+{
+    int lock_result = pthread_mutex_lock(&m_wakeup_mutex);
+    dICHECK(lock_result == EOK || ((errno = lock_result), false));
+
+    dIASSERT(!m_state_is_permanent); // Wakeup should not be used after permanent signal
+
+    if (!m_signaled_state)
+    {
+        if (MarkSignaledFirstWaiter())
+        {
+            // All threads must be woken up regardless to the fact that only one waiter is marked.
+            // It is not possible to wake up a chosen thread personally 
+            // and if a random thread is woken up it can't know if there was a condition signal for it
+            // or the sleep was interrupted by POSIX signal.
+            // On the other hand, without this it is not possible to guarantee that a thread
+            // will be woken up per each WakeupAThread() call if there is more than one waiter
+            // and wakeup requests will not accumulate if there are no waiters.
+            int broadcast_result = pthread_cond_broadcast(&m_wakeup_cond);
+            dICHECK(broadcast_result == EOK || ((errno = broadcast_result), false));
+        }
+        else
+        {
+            m_signaled_state = true;
+        }
+    }
+
+    int unlock_result = pthread_mutex_unlock(&m_wakeup_mutex);
+    dICHECK(unlock_result == EOK || ((errno = unlock_result), false));
+}
+
+void dxCondvarWakeup::WakeupAllThreads()
+{
+    int lock_result = pthread_mutex_lock(&m_wakeup_mutex);
+    dICHECK(lock_result == EOK || ((errno = lock_result), false));
+
+    m_state_is_permanent = true;
+
+    if (!m_signaled_state)
+    {
+        m_signaled_state = true;
+
+        if (MarkSignaledAllWaiters())
+        {
+            int broadcast_result = pthread_cond_broadcast(&m_wakeup_cond);
+            dICHECK(broadcast_result == EOK || ((errno = broadcast_result), false));
+        }
+    }
+
+    int unlock_result = pthread_mutex_unlock(&m_wakeup_mutex);
+    dICHECK(unlock_result == EOK || ((errno = unlock_result), false));
+}
+
+
+bool dxCondvarWakeup::WaitWakeup(const dThreadedWaitTime *timeout_time_ptr)
+{
+    bool wait_result;
+
+    int lock_result = pthread_mutex_lock(&m_wakeup_mutex);
+    dICHECK(lock_result == EOK || ((errno = lock_result), false));
+
+    if (!m_signaled_state)
+    {
+        if (!timeout_time_ptr || timeout_time_ptr->wait_nsec != 0 || timeout_time_ptr->wait_sec != 0)
+        {
+            wait_result = BlockAsAWaiter(timeout_time_ptr);
+        }
+        else
+        {
+            wait_result = false;
+        }
+    }
+    else
+    {
+        m_signaled_state = m_state_is_permanent;
+        wait_result = true;
+    }
+
+    int unlock_result = pthread_mutex_unlock(&m_wakeup_mutex);
+    dICHECK(unlock_result == EOK || ((errno = unlock_result), false));
+
+    return wait_result;
+}
+
+bool dxCondvarWakeup::BlockAsAWaiter(const dThreadedWaitTime *timeout_time_ptr)
+{
+    bool wait_result = false;
+
+    dxWaiterInfo waiter_info;
+    RegisterWaiterInList(&waiter_info);
+
+    timespec wakeup_time;
+
+    if (timeout_time_ptr != NULL)
+    {
+        timespec current_time;
+
+        int clock_result = _condvar_clock_gettime(CLOCK_MONOTONIC, &current_time);
+        dICHECK(clock_result != -1);
+
+        time_t wakeup_sec = current_time.tv_sec + timeout_time_ptr->wait_sec;
+        unsigned long wakeup_nsec = current_time.tv_nsec + timeout_time_ptr->wait_nsec;
+
+        if (wakeup_nsec >= 1000000000)
+        {
+            wakeup_nsec -= 1000000000;
+            wakeup_sec += 1;
+        }
+
+        wakeup_time.tv_sec = wakeup_sec;
+        wakeup_time.tv_nsec = wakeup_nsec;
+    }
+
+    while (true)
+    {
+        int cond_result = (timeout_time_ptr != NULL) 
+            ? pthread_cond_timedwait(&m_wakeup_cond, &m_wakeup_mutex, &wakeup_time) 
+            : pthread_cond_wait(&m_wakeup_cond, &m_wakeup_mutex);
+        dICHECK(cond_result == EOK || cond_result == ETIMEDOUT || ((errno = cond_result), false));
+
+        if (waiter_info.m_signal_state)
+        {
+            wait_result = true;
+            break;
+        }
+
+        if (cond_result == ETIMEDOUT)
+        {
+            dIASSERT(timeout_time_ptr != NULL);
+            break;
+        }
+    }
+
+    UnregisterWaiterFromList(&waiter_info);
+
+    return wait_result;
+}
+
+
+void dxCondvarWakeup::RegisterWaiterInList(dxWaiterInfo *waiter_info)
+{
+    dxWaiterInfo *const first_waiter = m_waiters_list;
+
+    if (first_waiter == NULL)
+    {
+        waiter_info->m_next_info = waiter_info;
+        waiter_info->m_prev_info_ptr = &waiter_info->m_next_info;
+        m_waiters_list = waiter_info;
+    }
+    else
+    {
+        waiter_info->m_next_info = first_waiter;
+        waiter_info->m_prev_info_ptr = first_waiter->m_prev_info_ptr;
+        *first_waiter->m_prev_info_ptr = waiter_info;
+        first_waiter->m_prev_info_ptr = &waiter_info->m_next_info;
+    }
+}
+
+void dxCondvarWakeup::UnregisterWaiterFromList(dxWaiterInfo *waiter_info)
+{
+    dxWaiterInfo *next_info = waiter_info->m_next_info;
+
+    if (next_info == waiter_info)
+    {
+        m_waiters_list = NULL;
+    }
+    else
+    {
+        next_info->m_prev_info_ptr = waiter_info->m_prev_info_ptr;
+        *waiter_info->m_prev_info_ptr = next_info;
+
+        if (waiter_info == m_waiters_list)
+        {
+            m_waiters_list = next_info;
+        }
+    }
+}
+
+
+bool dxCondvarWakeup::MarkSignaledFirstWaiter()
+{
+    bool waiter_found = false;
+
+    dxWaiterInfo *const first_waiter = m_waiters_list;
+
+    if (first_waiter)
+    {
+        waiter_found = MarkSignaledFirstWaiterMeaningful(first_waiter);
+    }
+
+    return waiter_found;
+}
+
+bool dxCondvarWakeup::MarkSignaledFirstWaiterMeaningful(dxWaiterInfo *first_waiter)
+{
+    bool waiter_found = false;
+
+    dxWaiterInfo *current_waiter = first_waiter;
+
+    while (true)
+    {
+        if (!current_waiter->m_signal_state)
+        {
+            current_waiter->m_signal_state = true;
+            waiter_found = true;
+            break;
+        }
+
+        current_waiter = current_waiter->m_next_info;
+        if (current_waiter == first_waiter)
+        {
+            break;
+        }
+    }
+
+    return waiter_found;
+}
+
+bool dxCondvarWakeup::MarkSignaledAllWaiters()
+{
+    bool waiter_found = false;
+
+    dxWaiterInfo *const first_waiter = m_waiters_list;
+
+    if (first_waiter)
+    {
+        waiter_found = MarkSignaledAllWaitersMeaningful(first_waiter);
+    }
+
+    return waiter_found;
+}
+
+bool dxCondvarWakeup::MarkSignaledAllWaitersMeaningful(dxWaiterInfo *first_waiter)
+{
+    bool waiter_found = false;
+
+    dxWaiterInfo *current_waiter = first_waiter;
+
+    while (true)
+    {
+        if (!current_waiter->m_signal_state)
+        {
+            current_waiter->m_signal_state = true;
+            waiter_found = true;
+        }
+
+        current_waiter = current_waiter->m_next_info;
+        if (current_waiter == first_waiter)
+        {
+            break;
+        }
+    }
+
+    return waiter_found;
+}
+
+
+/************************************************************************/
+/* dxMutexMutex class implementation                          */
+/************************************************************************/
+
+class dxMutexMutex
+{
+public:
+    dxMutexMutex(): m_mutex_allocated(false) {}
+    ~dxMutexMutex() { DoFinalizeObject(); }
+
+    bool InitializeObject() { return DoInitializeObject(); }
+
+private:
+    bool DoInitializeObject();
+    void DoFinalizeObject();
+
+public:
+    void LockMutex();
+    bool TryLockMutex();
+    void UnlockMutex();
+
+private:
+    pthread_mutex_t     m_mutex_instance;
+    bool                m_mutex_allocated;
+};
+
+
+bool dxMutexMutex::DoInitializeObject()
+{
+    dIASSERT(!m_mutex_allocated);
+
+    bool init_result = false;
+
+    do
+    {
+        int mutex_result = pthread_mutex_init(&m_mutex_instance, NULL);
+        if (mutex_result != EOK)
+        {
+            errno = mutex_result;
+            break;
+        }
+
+        m_mutex_allocated = true;
+        init_result = true;
+    }
+    while (false);
+
+    return init_result;
+}
+
+void dxMutexMutex::DoFinalizeObject()
+{
+    if (m_mutex_allocated)
+    {
+        int mutex_result = pthread_mutex_destroy(&m_mutex_instance);
+        dICHECK(mutex_result == EOK || ((errno = mutex_result), false));
+
+        m_mutex_allocated = false;
+    }
+}
+
+
+void dxMutexMutex::LockMutex()
+{
+    int lock_result = pthread_mutex_lock(&m_mutex_instance);
+    dICHECK(lock_result == EOK || ((errno = lock_result), false));
+}
+
+bool dxMutexMutex::TryLockMutex()
+{
+    int trylock_result = pthread_mutex_trylock(&m_mutex_instance);
+    dICHECK(trylock_result == EOK || trylock_result == EBUSY || ((errno = trylock_result), false));
+
+    return trylock_result == EOK;
+}
+
+void dxMutexMutex::UnlockMutex()
+{
+    int unlock_result = pthread_mutex_unlock(&m_mutex_instance);
+    dICHECK(unlock_result == EOK || ((errno = unlock_result), false));
+}
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+/************************************************************************/
+/* Self-threaded job list definition                                    */
+/************************************************************************/
+
+typedef dxtemplateJobListContainer<dxFakeLull, dxFakeMutex, dxFakeAtomicsProvider> dxSelfThreadedJobListContainer;
+typedef dxtemplateJobListSelfHandler<dxSelfWakeup, dxSelfThreadedJobListContainer> dxSelfThreadedJobListHandler;
+typedef dxtemplateThreadingImplementation<dxSelfThreadedJobListContainer, dxSelfThreadedJobListHandler> dxSelfThreadedThreading;
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+/************************************************************************/
+/* Multi-threaded job list definition                                   */
+/************************************************************************/
+
+typedef dxtemplateJobListContainer<dxtemplateThreadedLull<dxCondvarWakeup, dxOUAtomicsProvider, false>, dxMutexMutex, dxOUAtomicsProvider> dxMultiThreadedJobListContainer;
+typedef dxtemplateJobListThreadedHandler<dxCondvarWakeup, dxMultiThreadedJobListContainer> dxMultiThreadedJobListHandler;
+typedef dxtemplateThreadingImplementation<dxMultiThreadedJobListContainer, dxMultiThreadedJobListHandler> dxMultiThreadedThreading;
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+#endif // #if !defined(_WIN32)
+
+
+#endif // #ifndef _ODE_THREADING_IMPL_POSIX_H_
diff --git a/libs/ode-0.16.1/ode/src/threading_impl_templates.h b/libs/ode-0.16.1/ode/src/threading_impl_templates.h
new file mode 100644
index 0000000..acecbc3
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_impl_templates.h
@@ -0,0 +1,1265 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading implementation templates file.                              *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  Job list and Mutex group implementation templates for built-in threading 
+ *  support provider.
+ */
+
+
+#ifndef _ODE_THREADING_IMPL_TEMPLATES_H_
+#define _ODE_THREADING_IMPL_TEMPLATES_H_
+
+
+#include <ode/common.h>
+#include <ode/memory.h>
+
+#include <ode/threading.h>
+
+#include "objects.h"
+
+#include <new>
+
+
+#define dMAKE_JOBINSTANCE_RELEASEE(job_instance) ((dCallReleaseeID)(job_instance))
+#define dMAKE_RELEASEE_JOBINSTANCE(releasee) ((dxThreadedJobInfo *)(releasee))
+
+
+template <class tThreadMutex>
+class dxtemplateMutexGroup
+{
+private:
+    dxtemplateMutexGroup() {}
+    ~dxtemplateMutexGroup() {}
+
+public:
+    static dxtemplateMutexGroup<tThreadMutex> *AllocateInstance(dmutexindex_t Mutex_count);
+    static void FreeInstance(dxtemplateMutexGroup<tThreadMutex> *mutex_group);
+
+private:
+    bool InitializeMutexArray(dmutexindex_t Mutex_count);
+    void FinalizeMutexArray(dmutexindex_t Mutex_count);
+
+public:
+    void LockMutex(dmutexindex_t mutex_index) { dIASSERT(mutex_index < m_un.m_mutex_count); m_Mutex_array[mutex_index].LockMutex(); }
+    bool TryLockMutex(dmutexindex_t mutex_index) { dIASSERT(mutex_index < m_un.m_mutex_count); return m_Mutex_array[mutex_index].TryLockMutex(); }
+    void UnlockMutex(dmutexindex_t mutex_index) { dIASSERT(mutex_index < m_un.m_mutex_count); m_Mutex_array[mutex_index].UnlockMutex(); }
+
+private:
+    union
+    {
+        dmutexindex_t     m_mutex_count;
+        unsigned long     m_reserved_for_allignment[2];
+
+    } m_un;
+
+    tThreadMutex      m_Mutex_array[1];
+};
+
+template<class tThreadWakeup>
+class dxtemplateCallWait:
+    public dBase
+{
+public:
+    dxtemplateCallWait() {}
+    ~dxtemplateCallWait() { DoFinalizeObject(); }
+
+    bool InitializeObject() { return DoInitializeObject(); }
+
+private:
+    bool DoInitializeObject() { return m_wait_wakeup.InitializeObject(); }
+    void DoFinalizeObject() { /* Do nothing */ }
+
+public:
+    typedef dxtemplateCallWait<tThreadWakeup> dxCallWait;
+
+public:
+    void ResetTheWait() { m_wait_wakeup.ResetWakeup(); }
+    void SignalTheWait() { m_wait_wakeup.WakeupAllThreads(); }
+    bool PerformWaiting(const dThreadedWaitTime *timeout_time_ptr/*=NULL*/) { return m_wait_wakeup.WaitWakeup(timeout_time_ptr); }
+
+public:
+    static void AbstractSignalTheWait(void *wait_wakeup_ptr) { ((dxCallWait *)wait_wakeup_ptr)->SignalTheWait(); }
+
+private:
+    tThreadWakeup           m_wait_wakeup;
+};
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+template<class tThreadWakeup, class tAtomicsProvider, const bool tatomic_test_required>
+class dxtemplateThreadedLull
+{
+public:
+    dxtemplateThreadedLull(): m_registrant_count(0), m_alarm_wakeup() {}
+    ~dxtemplateThreadedLull() { dIASSERT(m_registrant_count == 0); DoFinalizeObject(); }
+
+    bool InitializeObject() { return DoInitializeObject(); }
+
+private:
+    bool DoInitializeObject() { return m_alarm_wakeup.InitializeObject(); }
+    void DoFinalizeObject() { /* Do nothing */ }
+
+private:
+    typedef typename tAtomicsProvider::atomicord_t atomicord_t;
+
+public:
+    void RegisterToLull() { tAtomicsProvider::IncrementTargetNoRet(&m_registrant_count); }
+    void WaitForLullAlarm() { dIASSERT(m_registrant_count != 0); m_alarm_wakeup.WaitWakeup(NULL); }
+    void UnregisterFromLull() { tAtomicsProvider::DecrementTargetNoRet(&m_registrant_count); }
+
+    void SignalLullAlarmIfAnyRegistrants()
+    {
+        if (tatomic_test_required ? (tAtomicsProvider::QueryTargetValue(&m_registrant_count) != 0) : (m_registrant_count != 0))
+        {
+            m_alarm_wakeup.WakeupAThread();
+        }
+    }
+
+private:
+    atomicord_t             m_registrant_count;
+    tThreadWakeup           m_alarm_wakeup;
+};
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+struct dxThreadedJobInfo:
+    public dBase
+{
+    dxThreadedJobInfo() {}
+    explicit dxThreadedJobInfo(void *): m_next_job(NULL) {}
+
+    void AssignJobData(ddependencycount_t dependencies_count, dxThreadedJobInfo *dependent_job, void *call_wait, 
+        int *fault_accumulator_ptr, dThreadedCallFunction *call_function, void *call_context, dcallindex_t call_index)
+    {
+        m_dependencies_count = dependencies_count;
+        m_dependent_job = dependent_job;
+        m_call_wait = call_wait;
+        m_fault_accumulator_ptr = fault_accumulator_ptr;
+
+        m_call_fault = 0;
+        m_call_function = call_function;
+        m_call_context = call_context;
+        m_call_index = call_index;
+    }
+
+    bool InvokeCallFunction()
+    {
+        int call_result = m_call_function(m_call_context, m_call_index, dMAKE_JOBINSTANCE_RELEASEE(this));
+        return call_result != 0;
+    }
+
+    dxThreadedJobInfo       *m_next_job;
+    dxThreadedJobInfo       **m_prev_job_next_ptr;
+
+    ddependencycount_t      m_dependencies_count;
+    dxThreadedJobInfo       *m_dependent_job;
+    void                    *m_call_wait;
+    int                     *m_fault_accumulator_ptr;
+
+    int                     m_call_fault;
+    dThreadedCallFunction   *m_call_function;
+    void                    *m_call_context;
+    dcallindex_t            m_call_index;
+};
+
+
+template<class tThreadMutex>
+class dxtemplateThreadingLockHelper
+{
+public:
+    dxtemplateThreadingLockHelper(tThreadMutex &mutex_instance): m_mutex_instance(mutex_instance), m_lock_indicator_flag(false) { LockMutex(); }
+    ~dxtemplateThreadingLockHelper() { if (m_lock_indicator_flag) { UnlockMutex(); } }
+
+    void LockMutex() { dIASSERT(!m_lock_indicator_flag); m_mutex_instance.LockMutex(); m_lock_indicator_flag = true; }
+    void UnlockMutex() { dIASSERT(m_lock_indicator_flag); m_mutex_instance.UnlockMutex(); m_lock_indicator_flag = false; }
+
+private:
+    tThreadMutex            &m_mutex_instance;
+    bool                    m_lock_indicator_flag;
+};
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+class dxtemplateJobListContainer
+{
+public:
+    dxtemplateJobListContainer():
+        m_job_list(NULL),
+        m_info_pool((atomicptr_t)NULL),
+        m_pool_access_lock(),
+        m_list_access_lock(),
+        m_info_wait_lull(),
+        m_info_count_known_to_be_preallocated(0)
+    {
+    }
+
+    ~dxtemplateJobListContainer()
+    {
+        dIASSERT(m_job_list == NULL); // Would not it be nice to wait for jobs to complete before deleting the list?
+
+        FreeJobInfoPoolInfos();
+        DoFinalizeObject();
+    }
+
+    bool InitializeObject() { return DoInitializeObject(); }
+
+private:
+    bool DoInitializeObject() { return m_pool_access_lock.InitializeObject() && m_list_access_lock.InitializeObject() && m_info_wait_lull.InitializeObject(); }
+    void DoFinalizeObject() { /* Do nothing */ }
+
+public:
+    typedef tAtomicsProvider dxAtomicsProvider;
+    typedef typename tAtomicsProvider::atomicord_t atomicord_t;
+    typedef typename tAtomicsProvider::atomicptr_t atomicptr_t;
+    typedef tThreadMutex dxThreadMutex;
+    typedef dxtemplateThreadingLockHelper<tThreadMutex> dxMutexLockHelper;
+    typedef void dWaitSignallingFunction(void *job_call_wait);
+
+public:
+    dxThreadedJobInfo *ReleaseAJobAndPickNextPendingOne(
+        dxThreadedJobInfo *job_to_release, bool job_result, dWaitSignallingFunction *wait_signal_proc_ptr, 
+        bool &out_last_job_flag);
+
+private:
+    dxThreadedJobInfo *PickNextPendingJob(bool &out_last_job_flag);
+    void ReleaseAJob(dxThreadedJobInfo *job_instance, bool job_result, dWaitSignallingFunction *wait_signal_proc_ptr);
+
+public:
+    inline dxThreadedJobInfo *AllocateJobInfoFromPool();
+    void QueueJobForProcessing(dxThreadedJobInfo *job_instance);
+
+    void AlterJobProcessingDependencies(dxThreadedJobInfo *job_instance, ddependencychange_t dependencies_count_change, 
+        bool &out_job_has_become_ready);
+
+private:
+    inline ddependencycount_t SmartAddJobDependenciesCount(dxThreadedJobInfo *job_instance, ddependencychange_t dependencies_count_change);
+
+    inline void InsertJobInfoIntoListHead(dxThreadedJobInfo *job_instance);
+    inline void RemoveJobInfoFromList(dxThreadedJobInfo *job_instance);
+
+    dxThreadedJobInfo *ExtractJobInfoFromPoolOrAllocate();
+    inline void ReleaseJobInfoIntoPool(dxThreadedJobInfo *job_instance);
+
+private:
+    void FreeJobInfoPoolInfos();
+
+public:
+    bool EnsureNumberOfJobInfosIsPreallocated(ddependencycount_t required_info_count);
+
+private:
+    bool DoPreallocateJobInfos(ddependencycount_t required_info_count);
+
+public:
+    bool IsJobListReadyForShutdown() const { return m_job_list == NULL; }
+
+private:
+    dxThreadedJobInfo       *m_job_list;
+    volatile atomicptr_t    m_info_pool; // dxThreadedJobInfo *
+    tThreadMutex            m_pool_access_lock;
+    tThreadMutex            m_list_access_lock;
+    tThreadLull             m_info_wait_lull;
+    ddependencycount_t      m_info_count_known_to_be_preallocated;
+};
+
+
+typedef void (dxThreadReadyToServeCallback)(void *callback_context);
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+template<class tThreadWakeup, class tJobListContainer>
+class dxtemplateJobListThreadedHandler
+{
+public:
+    dxtemplateJobListThreadedHandler(tJobListContainer *list_container_ptr):
+        m_job_list_ptr(list_container_ptr),
+        m_processing_wakeup(),
+        m_active_thread_count(0),
+        m_shutdown_requested(0)
+    {
+    }
+
+    ~dxtemplateJobListThreadedHandler()
+    {
+        dIASSERT(m_active_thread_count == 0);
+
+        DoFinalizeObject();
+    }
+
+    bool InitializeObject() { return DoInitializeObject(); }
+
+private:
+    bool DoInitializeObject() { return m_processing_wakeup.InitializeObject(); }
+    void DoFinalizeObject() { /* Do nothing */ }
+
+public:
+    typedef dxtemplateCallWait<tThreadWakeup> dxCallWait;
+
+public:
+    inline void ProcessActiveJobAddition();
+    inline void PrepareForWaitingAJobCompletion();
+
+public:
+    inline unsigned RetrieveActiveThreadsCount();
+    inline void StickToJobsProcessing(dxThreadReadyToServeCallback *readiness_callback/*=NULL*/, void *callback_context/*=NULL*/);
+
+private:
+    void PerformJobProcessingUntilShutdown();
+    void PerformJobProcessingSession();
+
+    void BlockAsIdleThread();
+    void ActivateAnIdleThread();
+
+public:
+    inline void ShutdownProcessing();
+    inline void CleanupForRestart();
+
+private:
+    bool IsShutdownRequested() const { return m_shutdown_requested != 0; }
+
+private:
+    typedef typename tJobListContainer::dxAtomicsProvider dxAtomicsProvider;
+    typedef typename tJobListContainer::atomicord_t atomicord_t;
+
+    atomicord_t GetActiveThreadsCount() const { return m_active_thread_count; }
+    void RegisterAsActiveThread() { dxAtomicsProvider::template AddValueToTarget<sizeof(atomicord_t)>((volatile void *)&m_active_thread_count, 1); }
+    void UnregisterAsActiveThread() { dxAtomicsProvider::template AddValueToTarget<sizeof(atomicord_t)>((volatile void *)&m_active_thread_count, -1); }
+
+private:
+    tJobListContainer       *m_job_list_ptr;
+    tThreadWakeup           m_processing_wakeup;
+    volatile atomicord_t    m_active_thread_count;
+    int                     m_shutdown_requested;
+};
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+template<class tThreadWakeup, class tJobListContainer>
+class dxtemplateJobListSelfHandler
+{
+public:
+    dxtemplateJobListSelfHandler(tJobListContainer *list_container_ptr):
+        m_job_list_ptr(list_container_ptr)
+    {
+    }
+
+    ~dxtemplateJobListSelfHandler()
+    {
+        // Do nothing
+    }
+
+    bool InitializeObject() { return true; }
+
+public:
+    typedef dxtemplateCallWait<tThreadWakeup> dxCallWait;
+
+public:
+    inline void ProcessActiveJobAddition();
+    inline void PrepareForWaitingAJobCompletion();
+
+public:
+    inline unsigned RetrieveActiveThreadsCount();
+    inline void StickToJobsProcessing(dxThreadReadyToServeCallback *readiness_callback/*=NULL*/, void *callback_context/*=NULL*/);
+
+private:
+    void PerformJobProcessingUntilExhaustion();
+    void PerformJobProcessingSession();
+
+public:
+    inline void ShutdownProcessing();
+    inline void CleanupForRestart();
+
+private:
+    tJobListContainer       *m_job_list_ptr;
+};
+
+
+struct dIMutexGroup;
+struct dxICallWait;
+
+class dxIThreadingImplementation
+{
+public:
+    virtual void FreeInstance() = 0;
+
+public:
+    virtual dIMutexGroup *AllocMutexGroup(dmutexindex_t Mutex_count) = 0;
+    virtual void FreeMutexGroup(dIMutexGroup *mutex_group) = 0;
+    virtual void LockMutexGroupMutex(dIMutexGroup *mutex_group, dmutexindex_t mutex_index) = 0;
+    // virtual bool TryLockMutexGroupMutex(dIMutexGroup *mutex_group, dmutexindex_t mutex_index) = 0;
+    virtual void UnlockMutexGroupMutex(dIMutexGroup *mutex_group, dmutexindex_t mutex_index) = 0;
+
+public:
+    virtual dxICallWait *AllocACallWait() = 0;
+    virtual void ResetACallWait(dxICallWait *call_wait) = 0;
+    virtual void FreeACallWait(dxICallWait *call_wait) = 0;
+
+public:
+    virtual bool PreallocateJobInfos(ddependencycount_t max_simultaneous_calls_estimate) = 0;
+    virtual void ScheduleNewJob(int *fault_accumulator_ptr/*=NULL*/, 
+        dCallReleaseeID *out_post_releasee_ptr/*=NULL*/, ddependencycount_t dependencies_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+        dxICallWait *call_wait/*=NULL*/, 
+        dThreadedCallFunction *call_func, void *call_context, dcallindex_t instance_index) = 0;
+    virtual void AlterJobDependenciesCount(dCallReleaseeID target_releasee, ddependencychange_t dependencies_count_change) = 0;
+    virtual void WaitJobCompletion(int *out_wait_status_ptr/*=NULL*/, 
+        dxICallWait *call_wait, const dThreadedWaitTime *timeout_time_ptr/*=NULL*/) = 0;
+
+public:
+    virtual unsigned RetrieveActiveThreadsCount() = 0;
+    virtual void StickToJobsProcessing(dxThreadReadyToServeCallback *readiness_callback/*=NULL*/, void *callback_context/*=NULL*/) = 0;
+    virtual void ShutdownProcessing() = 0;
+    virtual void CleanupForRestart() = 0;
+};
+
+
+template<class tJobListContainer, class tJobListHandler>
+class dxtemplateThreadingImplementation:
+    public dBase,
+    public dxIThreadingImplementation
+{
+public:
+    dxtemplateThreadingImplementation():
+        dBase(),
+        m_list_container(),
+        m_list_handler(&m_list_container)
+    {
+    }
+
+    virtual ~dxtemplateThreadingImplementation()
+    {
+        DoFinalizeObject();
+    }
+
+    bool InitializeObject() { return DoInitializeObject(); }
+
+private:
+    bool DoInitializeObject() { return m_list_container.InitializeObject() && m_list_handler.InitializeObject(); }
+    void DoFinalizeObject() { /* Do nothing */ }
+
+protected:
+    virtual void FreeInstance();
+
+private:
+    typedef dxtemplateMutexGroup<typename tJobListContainer::dxThreadMutex> dxMutexGroup;
+    typedef typename tJobListHandler::dxCallWait dxCallWait;
+
+protected:
+    virtual dIMutexGroup *AllocMutexGroup(dmutexindex_t Mutex_count);
+    virtual void FreeMutexGroup(dIMutexGroup *mutex_group);
+    virtual void LockMutexGroupMutex(dIMutexGroup *mutex_group, dmutexindex_t mutex_index);
+    // virtual bool TryLockMutexGroupMutex(dIMutexGroup *mutex_group, dmutexindex_t mutex_index);
+    virtual void UnlockMutexGroupMutex(dIMutexGroup *mutex_group, dmutexindex_t mutex_index);
+
+protected:
+    virtual dxICallWait *AllocACallWait();
+    virtual void ResetACallWait(dxICallWait *call_wait);
+    virtual void FreeACallWait(dxICallWait *call_wait);
+
+protected:
+    virtual bool PreallocateJobInfos(ddependencycount_t max_simultaneous_calls_estimate);
+    virtual void ScheduleNewJob(int *fault_accumulator_ptr/*=NULL*/, 
+        dCallReleaseeID *out_post_releasee_ptr/*=NULL*/, ddependencycount_t dependencies_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+        dxICallWait *call_wait/*=NULL*/, 
+        dThreadedCallFunction *call_func, void *call_context, dcallindex_t instance_index);
+    virtual void AlterJobDependenciesCount(dCallReleaseeID target_releasee, ddependencychange_t dependencies_count_change);
+    virtual void WaitJobCompletion(int *out_wait_status_ptr/*=NULL*/, 
+        dxICallWait *call_wait, const dThreadedWaitTime *timeout_time_ptr/*=NULL*/);
+
+protected:
+    virtual unsigned RetrieveActiveThreadsCount();
+    virtual void StickToJobsProcessing(dxThreadReadyToServeCallback *readiness_callback/*=NULL*/, void *callback_context/*=NULL*/);
+    virtual void ShutdownProcessing();
+    virtual void CleanupForRestart();
+
+private:
+    tJobListContainer     m_list_container;
+    tJobListHandler       m_list_handler;
+};
+
+
+/************************************************************************/
+/* Implementation of dxtemplateMutexGroup                               */
+/************************************************************************/
+
+template<class tThreadMutex>
+/*static */dxtemplateMutexGroup<tThreadMutex> *dxtemplateMutexGroup<tThreadMutex>::AllocateInstance(dmutexindex_t Mutex_count)
+{
+    dAASSERT(Mutex_count != 0);
+
+    const dxtemplateMutexGroup<tThreadMutex> *const dummy_group = (dxtemplateMutexGroup<tThreadMutex> *)(sizeint)8;
+    const sizeint size_requited = ((sizeint)(&dummy_group->m_Mutex_array) - (sizeint)dummy_group) + Mutex_count * sizeof(tThreadMutex);
+    dxtemplateMutexGroup<tThreadMutex> *mutex_group = (dxtemplateMutexGroup<tThreadMutex> *)dAlloc(size_requited);
+
+    if (mutex_group != NULL)
+    {
+        mutex_group->m_un.m_mutex_count = Mutex_count;
+
+        if (!mutex_group->InitializeMutexArray(Mutex_count))
+        {
+            dFree((void *)mutex_group, size_requited);
+            mutex_group = NULL;
+        }
+    }
+
+    return mutex_group;
+}
+
+template<class tThreadMutex>
+/*static */void dxtemplateMutexGroup<tThreadMutex>::FreeInstance(dxtemplateMutexGroup<tThreadMutex> *mutex_group)
+{
+    if (mutex_group != NULL)
+    {
+        dmutexindex_t Mutex_count = mutex_group->m_un.m_mutex_count;
+        mutex_group->FinalizeMutexArray(Mutex_count);
+
+        const sizeint anyting_not_zero = 2 * sizeof(sizeint);
+        const dxtemplateMutexGroup<tThreadMutex> *const dummy_group = (dxtemplateMutexGroup<tThreadMutex> *)anyting_not_zero;
+        const sizeint size_requited = ((sizeint)(&dummy_group->m_Mutex_array) - (sizeint)dummy_group) + Mutex_count * sizeof(tThreadMutex);
+        dFree((void *)mutex_group, size_requited);
+    }
+}
+
+template<class tThreadMutex>
+bool dxtemplateMutexGroup<tThreadMutex>::InitializeMutexArray(dmutexindex_t Mutex_count)
+{
+    bool any_fault = false;
+
+    dmutexindex_t mutex_index = 0;
+    for (; mutex_index != Mutex_count; ++mutex_index)
+    {
+        tThreadMutex *mutex_storage = m_Mutex_array + mutex_index;
+
+        new(mutex_storage) tThreadMutex;
+
+        if (!mutex_storage->InitializeObject())
+        {
+            mutex_storage->tThreadMutex::~tThreadMutex();
+
+            any_fault = true;
+            break;
+        }
+    }
+
+    if (any_fault)
+    {
+        FinalizeMutexArray(mutex_index);
+    }
+
+    bool init_result = !any_fault;
+    return init_result;
+}
+
+template<class tThreadMutex>
+void dxtemplateMutexGroup<tThreadMutex>::FinalizeMutexArray(dmutexindex_t Mutex_count)
+{
+    for (dmutexindex_t mutex_index = 0; mutex_index != Mutex_count; ++mutex_index)
+    {
+        tThreadMutex *mutex_storage = m_Mutex_array + mutex_index;
+
+        mutex_storage->tThreadMutex::~tThreadMutex();
+    }
+}
+
+/************************************************************************/
+/* Implementation of dxtemplateJobListContainer                         */
+/************************************************************************/
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+dxThreadedJobInfo *dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::ReleaseAJobAndPickNextPendingOne(
+    dxThreadedJobInfo *job_to_release, bool job_result, dWaitSignallingFunction *wait_signal_proc_ptr, bool &out_last_job_flag)
+{
+    if (job_to_release != NULL)
+    {
+        ReleaseAJob(job_to_release, job_result, wait_signal_proc_ptr);
+    }
+
+    dxMutexLockHelper list_access(m_list_access_lock);
+
+    dxThreadedJobInfo *picked_job = PickNextPendingJob(out_last_job_flag);
+    return picked_job;
+}
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+dxThreadedJobInfo *dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::PickNextPendingJob(
+    bool &out_last_job_flag)
+{
+    dxThreadedJobInfo *current_job = m_job_list;
+    bool last_job_flag = false;
+
+    while (current_job != NULL)
+    {
+        if (current_job->m_dependencies_count == 0)
+        {
+            // It is OK to assign in unsafe manner - dependencies count should not be changed
+            // after the job has become ready for execution
+            current_job->m_dependencies_count = 1;
+            last_job_flag = current_job->m_next_job == NULL;
+
+            RemoveJobInfoFromList(current_job);
+            break;
+        }
+
+        current_job = current_job->m_next_job;
+    }
+
+    out_last_job_flag = last_job_flag;
+    return current_job;
+}
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+void dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::ReleaseAJob(
+    dxThreadedJobInfo *job_instance, bool job_result, dWaitSignallingFunction *wait_signal_proc_ptr)
+{
+    dxThreadedJobInfo *current_job = job_instance;
+
+    if (!job_result)
+    {
+        // Accumulate call fault (be careful to not reset it!!!)
+        current_job->m_call_fault = 1;
+    }
+
+    bool job_dequeued = true;
+    dIASSERT(current_job->m_prev_job_next_ptr == NULL);
+
+    while (true)
+    {
+        dIASSERT(current_job->m_dependencies_count != 0);
+
+        ddependencycount_t new_dependencies_count = SmartAddJobDependenciesCount(current_job, -1);
+
+        if (new_dependencies_count != 0 || !job_dequeued)
+        {
+            break;
+        }
+
+        void *job_call_wait = current_job->m_call_wait;
+
+        if (job_call_wait != NULL)
+        {
+            wait_signal_proc_ptr(job_call_wait);
+        }
+
+        int call_fault = current_job->m_call_fault;
+
+        if (current_job->m_fault_accumulator_ptr)
+        {
+            *current_job->m_fault_accumulator_ptr = call_fault;
+        }
+
+        dxThreadedJobInfo *dependent_job = current_job->m_dependent_job;
+        ReleaseJobInfoIntoPool(current_job);
+
+        if (dependent_job == NULL)
+        {
+            break;
+        }
+
+        if (call_fault)
+        {
+            // Accumulate call fault (be careful to not reset it!!!)
+            dependent_job->m_call_fault = 1;
+        }
+
+        current_job = dependent_job;
+        job_dequeued = dependent_job->m_prev_job_next_ptr == NULL;
+    }
+}
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+dxThreadedJobInfo *dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::AllocateJobInfoFromPool()
+{
+    // No locking is necessary
+    dxThreadedJobInfo *job_instance = ExtractJobInfoFromPoolOrAllocate();
+    return job_instance;
+}
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+void dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::QueueJobForProcessing(dxThreadedJobInfo *job_instance)
+{
+    dxMutexLockHelper list_access(m_list_access_lock);
+
+    InsertJobInfoIntoListHead(job_instance);
+}
+
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+void dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::AlterJobProcessingDependencies(dxThreadedJobInfo *job_instance, ddependencychange_t dependencies_count_change, 
+                                                                                                             bool &out_job_has_become_ready)
+{
+    // Dependencies should not be changed when job has already become ready for execution
+    dIASSERT(job_instance->m_dependencies_count != 0);
+    // It's OK that access is not atomic - that is to be handled by external logic
+    dIASSERT(dependencies_count_change < 0 ? (job_instance->m_dependencies_count >= (ddependencycount_t)(-dependencies_count_change)) : ((ddependencycount_t)(-(ddependencychange_t)job_instance->m_dependencies_count) > (ddependencycount_t)dependencies_count_change));
+
+    ddependencycount_t new_dependencies_count = SmartAddJobDependenciesCount(job_instance, dependencies_count_change);
+    out_job_has_become_ready = new_dependencies_count == 0;
+}
+
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+ddependencycount_t dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::SmartAddJobDependenciesCount(
+    dxThreadedJobInfo *job_instance, ddependencychange_t dependencies_count_change)
+{
+    ddependencycount_t new_dependencies_count = tAtomicsProvider::template AddValueToTarget<sizeof(ddependencycount_t)>((volatile void *)&job_instance->m_dependencies_count, dependencies_count_change) + dependencies_count_change;
+    return new_dependencies_count;
+}
+
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+void dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::InsertJobInfoIntoListHead(
+    dxThreadedJobInfo *job_instance)
+{
+    dxThreadedJobInfo *job_list_head = m_job_list;
+    job_instance->m_next_job = job_list_head;
+
+    if (job_list_head != NULL)
+    {
+        job_list_head->m_prev_job_next_ptr = &job_instance->m_next_job;
+    }
+
+    job_instance->m_prev_job_next_ptr = &m_job_list;
+    m_job_list = job_instance;
+}
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+void dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::RemoveJobInfoFromList(
+    dxThreadedJobInfo *job_instance)
+{
+    if (job_instance->m_next_job)
+    { 
+        job_instance->m_next_job->m_prev_job_next_ptr = job_instance->m_prev_job_next_ptr;
+    }
+
+    *job_instance->m_prev_job_next_ptr = job_instance->m_next_job;
+    // Assign NULL to m_prev_job_next_ptr as an indicator that instance has been dequeued
+    job_instance->m_prev_job_next_ptr = NULL;
+}
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+dxThreadedJobInfo *dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::ExtractJobInfoFromPoolOrAllocate()
+{
+    dxThreadedJobInfo *result_info;
+
+    bool waited_lull = false;
+    m_info_wait_lull.RegisterToLull();
+
+    while (true)
+    {
+        dxThreadedJobInfo *raw_head_info = (dxThreadedJobInfo *)m_info_pool;
+
+        if (raw_head_info == NULL)
+        {
+            result_info = new dxThreadedJobInfo();
+
+            if (result_info != NULL)
+            {
+                break;
+            }
+
+            m_info_wait_lull.WaitForLullAlarm();
+            waited_lull = true;
+        }
+
+        // Extraction must be locked so that other thread does not "steal" head info,
+        // use it and then reinsert back with a different "next"
+        dxMutexLockHelper pool_access(m_pool_access_lock);
+
+        dxThreadedJobInfo *head_info = (dxThreadedJobInfo *)m_info_pool; // Head info must be re-read after mutex had been locked
+
+        if (head_info != NULL)
+        {
+            dxThreadedJobInfo *next_info = head_info->m_next_job;
+            if (tAtomicsProvider::CompareExchangeTargetPtr(&m_info_pool, (atomicptr_t)head_info, (atomicptr_t)next_info))
+            {
+                result_info = head_info;
+                break;
+            }
+        }
+    }
+
+    m_info_wait_lull.UnregisterFromLull();
+
+    if (waited_lull)
+    {
+        // It is necessary to re-signal lull alarm if current thread was waiting as
+        // there might be other threads waiting which might have not received alarm signal.
+        m_info_wait_lull.SignalLullAlarmIfAnyRegistrants();
+    }
+
+    return result_info;
+}
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+void dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::ReleaseJobInfoIntoPool(
+    dxThreadedJobInfo *job_instance)
+{
+    while (true)
+    {
+        dxThreadedJobInfo *next_info = (dxThreadedJobInfo *)m_info_pool;
+        job_instance->m_next_job = next_info;
+
+        if (tAtomicsProvider::CompareExchangeTargetPtr(&m_info_pool, (atomicptr_t)next_info, (atomicptr_t)job_instance))
+        {
+            break;
+        }
+    }
+
+    m_info_wait_lull.SignalLullAlarmIfAnyRegistrants();
+}
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+void dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::FreeJobInfoPoolInfos()
+{
+    dxThreadedJobInfo *current_info = (dxThreadedJobInfo *)m_info_pool;
+
+    while (current_info != NULL)
+    {
+        dxThreadedJobInfo *info_save = current_info;
+        current_info = current_info->m_next_job;
+
+        delete info_save;
+    }
+
+    m_info_pool = (atomicptr_t)NULL;
+}
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+bool dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::EnsureNumberOfJobInfosIsPreallocated(ddependencycount_t required_info_count)
+{
+    bool result = required_info_count <= m_info_count_known_to_be_preallocated 
+        || DoPreallocateJobInfos(required_info_count);
+    return result;
+}
+
+template<class tThreadLull, class tThreadMutex, class tAtomicsProvider>
+bool dxtemplateJobListContainer<tThreadLull, tThreadMutex, tAtomicsProvider>::DoPreallocateJobInfos(ddependencycount_t required_info_count)
+{
+    dIASSERT(required_info_count > m_info_count_known_to_be_preallocated); // Also ensures required_info_count > 0
+
+    bool allocation_failure = false;
+
+    dxThreadedJobInfo *info_pool = (dxThreadedJobInfo *)m_info_pool;
+
+    ddependencycount_t info_index = 0;
+    for (dxThreadedJobInfo **current_info_ptr = &info_pool; ; )
+    {
+        dxThreadedJobInfo *current_info = *current_info_ptr;
+
+        if (current_info == NULL)
+        {
+            current_info = new dxThreadedJobInfo(NULL);
+
+            if (current_info == NULL)
+            {
+                allocation_failure = true;
+                break;
+            }
+
+            *current_info_ptr = current_info;
+        }
+
+        if (++info_index == required_info_count)
+        {
+            m_info_count_known_to_be_preallocated = info_index;
+            break;
+        }
+
+        current_info_ptr = &current_info->m_next_job;
+    }
+
+    // Make sure m_info_pool was not changed
+    dIASSERT(m_info_pool == NULL || m_info_pool == (atomicptr_t)info_pool);
+
+    m_info_pool = (atomicptr_t)info_pool;
+
+    bool result = !allocation_failure;
+    return result;
+}
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+/************************************************************************/
+/* Implementation of dxtemplateJobListThreadedHandler                   */
+/************************************************************************/
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListThreadedHandler<tThreadWakeup, tJobListContainer>::ProcessActiveJobAddition()
+{
+    ActivateAnIdleThread();
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListThreadedHandler<tThreadWakeup, tJobListContainer>::PrepareForWaitingAJobCompletion()
+{
+    // Do nothing
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+unsigned dxtemplateJobListThreadedHandler<tThreadWakeup, tJobListContainer>::RetrieveActiveThreadsCount()
+{
+    return GetActiveThreadsCount();
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListThreadedHandler<tThreadWakeup, tJobListContainer>::StickToJobsProcessing(dxThreadReadyToServeCallback *readiness_callback/*=NULL*/, void *callback_context/*=NULL*/)
+{
+    RegisterAsActiveThread();
+
+    if (readiness_callback != NULL)
+    {
+        (*readiness_callback)(callback_context);
+    }
+
+    PerformJobProcessingUntilShutdown();
+
+    UnregisterAsActiveThread();
+}
+
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListThreadedHandler<tThreadWakeup, tJobListContainer>::PerformJobProcessingUntilShutdown()
+{
+    while (true)
+    {
+        // It is expected that new jobs will not be queued any longer after shutdown had been requested
+        if (IsShutdownRequested() && m_job_list_ptr->IsJobListReadyForShutdown())
+        {
+            break;
+        }
+
+        PerformJobProcessingSession();
+
+        // It is expected that new jobs will not be queued any longer after shutdown had been requested
+        if (IsShutdownRequested() && m_job_list_ptr->IsJobListReadyForShutdown())
+        {
+            break;
+        }
+
+        BlockAsIdleThread();
+    }
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListThreadedHandler<tThreadWakeup, tJobListContainer>::PerformJobProcessingSession()
+{
+    dxThreadedJobInfo *current_job = NULL;
+    bool job_result = false;
+
+    while (true)
+    {
+        bool last_job_flag;
+        current_job = m_job_list_ptr->ReleaseAJobAndPickNextPendingOne(current_job, job_result, &dxCallWait::AbstractSignalTheWait, last_job_flag);
+
+        if (!current_job)
+        {
+            break;
+        }
+
+        if (!last_job_flag)
+        {
+            ActivateAnIdleThread();
+        }
+
+        job_result = current_job->InvokeCallFunction();
+    }
+}
+
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListThreadedHandler<tThreadWakeup, tJobListContainer>::BlockAsIdleThread()
+{
+    m_processing_wakeup.WaitWakeup(NULL);
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListThreadedHandler<tThreadWakeup, tJobListContainer>::ActivateAnIdleThread()
+{
+    m_processing_wakeup.WakeupAThread();
+}
+
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListThreadedHandler<tThreadWakeup, tJobListContainer>::ShutdownProcessing()
+{
+    m_shutdown_requested = true;
+    m_processing_wakeup.WakeupAllThreads();
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListThreadedHandler<tThreadWakeup, tJobListContainer>::CleanupForRestart()
+{
+    m_shutdown_requested = false;
+    m_processing_wakeup.ResetWakeup();
+}
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+/************************************************************************/
+/* Implementation of dxtemplateJobListSelfHandler                       */
+/************************************************************************/
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListSelfHandler<tThreadWakeup, tJobListContainer>::ProcessActiveJobAddition()
+{
+    // Do nothing
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListSelfHandler<tThreadWakeup, tJobListContainer>::PrepareForWaitingAJobCompletion()
+{
+    PerformJobProcessingUntilExhaustion();
+}
+
+
+template<class tThreadWakeup, class tJobListContainer>
+unsigned dxtemplateJobListSelfHandler<tThreadWakeup, tJobListContainer>::RetrieveActiveThreadsCount()
+{
+    return 0U; // Return zero to indicate that there are no actual active threads provided.
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListSelfHandler<tThreadWakeup, tJobListContainer>::StickToJobsProcessing(dxThreadReadyToServeCallback *readiness_callback/*=NULL*/, void *callback_context/*=NULL*/)
+{
+    (void)readiness_callback; // unused
+    (void)callback_context; // unused
+    dIASSERT(false); // This method is not expected to be called for Self-Handler
+}
+
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListSelfHandler<tThreadWakeup, tJobListContainer>::PerformJobProcessingUntilExhaustion()
+{
+    PerformJobProcessingSession();
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListSelfHandler<tThreadWakeup, tJobListContainer>::PerformJobProcessingSession()
+{
+    dxThreadedJobInfo *current_job = NULL;
+    bool job_result = false;
+
+    while (true)
+    {
+        bool dummy_last_job_flag;
+        current_job = m_job_list_ptr->ReleaseAJobAndPickNextPendingOne(current_job, job_result, &dxCallWait::AbstractSignalTheWait, dummy_last_job_flag);
+
+        if (!current_job)
+        {
+            break;
+        }
+
+        job_result = current_job->InvokeCallFunction();
+    }
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListSelfHandler<tThreadWakeup, tJobListContainer>::ShutdownProcessing()
+{
+    // Do nothing
+}
+
+template<class tThreadWakeup, class tJobListContainer>
+void dxtemplateJobListSelfHandler<tThreadWakeup, tJobListContainer>::CleanupForRestart()
+{
+    // Do nothing
+}
+
+
+/************************************************************************/
+/* Implementation of dxtemplateThreadingImplementation                          */
+/************************************************************************/
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::FreeInstance()
+{
+    delete this;
+}
+
+
+template<class tJobListContainer, class tJobListHandler>
+dIMutexGroup *dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::AllocMutexGroup(dmutexindex_t Mutex_count)
+{
+    dxMutexGroup *mutex_group = dxMutexGroup::AllocateInstance(Mutex_count);
+    return (dIMutexGroup *)mutex_group;
+}
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::FreeMutexGroup(dIMutexGroup *mutex_group)
+{
+    dxMutexGroup::FreeInstance((dxMutexGroup *)mutex_group);
+}
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::LockMutexGroupMutex(dIMutexGroup *mutex_group, dmutexindex_t mutex_index)
+{
+    ((dxMutexGroup *)mutex_group)->LockMutex(mutex_index);
+}
+
+// template<class tJobListContainer, class tJobListHandler>
+// bool dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::TryLockMutexGroupMutex(dIMutexGroup *mutex_group, dmutexindex_t mutex_index)
+// {
+//   return ((dxMutexGroup *)mutex_group)->TryLockMutex(mutex_index);
+// }
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::UnlockMutexGroupMutex(dIMutexGroup *mutex_group, dmutexindex_t mutex_index)
+{
+    ((dxMutexGroup *)mutex_group)->UnlockMutex(mutex_index);
+}
+
+
+template<class tJobListContainer, class tJobListHandler>
+dxICallWait *dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::AllocACallWait()
+{
+    dxCallWait *call_wait = new dxCallWait();
+
+    if (call_wait != NULL && !call_wait->InitializeObject())
+    {
+        delete call_wait;
+        call_wait = NULL;
+    }
+
+    return (dxICallWait *)call_wait;
+}
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::ResetACallWait(dxICallWait *call_wait)
+{
+    ((dxCallWait *)call_wait)->ResetTheWait();
+}
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::FreeACallWait(dxICallWait *call_wait)
+{
+    delete ((dxCallWait *)call_wait);
+}
+
+
+template<class tJobListContainer, class tJobListHandler>
+bool dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::PreallocateJobInfos(ddependencycount_t max_simultaneous_calls_estimate)
+{
+    // No multithreading protection here!
+    // Resources are to be preallocated before jobs start to be scheduled
+    // as otherwise there is no way to implement the preallocation.
+    bool result = m_list_container.EnsureNumberOfJobInfosIsPreallocated(max_simultaneous_calls_estimate);
+    return result;
+}
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::ScheduleNewJob(
+    int *fault_accumulator_ptr/*=NULL*/, 
+    dCallReleaseeID *out_post_releasee_ptr/*=NULL*/, ddependencycount_t dependencies_count, dCallReleaseeID dependent_releasee/*=NULL*/, 
+    dxICallWait *call_wait/*=NULL*/, 
+    dThreadedCallFunction *call_func, void *call_context, dcallindex_t instance_index)
+{
+    dxThreadedJobInfo *new_job = m_list_container.AllocateJobInfoFromPool();
+    dIASSERT(new_job != NULL);
+
+    new_job->AssignJobData(dependencies_count, dMAKE_RELEASEE_JOBINSTANCE(dependent_releasee), (dxCallWait *)call_wait, fault_accumulator_ptr, call_func, call_context, instance_index);
+
+    if (out_post_releasee_ptr != NULL)
+    {
+        *out_post_releasee_ptr = dMAKE_JOBINSTANCE_RELEASEE(new_job);
+    }
+
+    m_list_container.QueueJobForProcessing(new_job);
+
+    if (dependencies_count == 0)
+    {
+        m_list_handler.ProcessActiveJobAddition();
+    }
+}
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::AlterJobDependenciesCount(
+    dCallReleaseeID target_releasee, ddependencychange_t dependencies_count_change)
+{
+    dIASSERT(dependencies_count_change != 0);
+
+    dxThreadedJobInfo *job_instance = dMAKE_RELEASEE_JOBINSTANCE(target_releasee);
+
+    bool job_has_become_ready;
+    m_list_container.AlterJobProcessingDependencies(job_instance, dependencies_count_change, job_has_become_ready);
+
+    if (job_has_become_ready)
+    {
+        m_list_handler.ProcessActiveJobAddition();
+    }
+}
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::WaitJobCompletion(
+    int *out_wait_status_ptr/*=NULL*/, 
+    dxICallWait *call_wait, const dThreadedWaitTime *timeout_time_ptr/*=NULL*/)
+{
+    dIASSERT(call_wait != NULL);
+
+    m_list_handler.PrepareForWaitingAJobCompletion();
+
+    bool wait_status = ((dxCallWait *)call_wait)->PerformWaiting(timeout_time_ptr);
+    dIASSERT(timeout_time_ptr != NULL || wait_status);
+
+    if (out_wait_status_ptr)
+    {
+        *out_wait_status_ptr = wait_status;
+    }
+}
+
+
+template<class tJobListContainer, class tJobListHandler>
+unsigned dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::RetrieveActiveThreadsCount()
+{
+    return m_list_handler.RetrieveActiveThreadsCount();
+}
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::StickToJobsProcessing(dxThreadReadyToServeCallback *readiness_callback/*=NULL*/, void *callback_context/*=NULL*/)
+{
+    m_list_handler.StickToJobsProcessing(readiness_callback, callback_context);
+}
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::ShutdownProcessing()
+{
+    m_list_handler.ShutdownProcessing();
+}
+
+template<class tJobListContainer, class tJobListHandler>
+void dxtemplateThreadingImplementation<tJobListContainer, tJobListHandler>::CleanupForRestart()
+{
+    m_list_handler.CleanupForRestart();
+}
+
+
+#endif // #ifndef _ODE_THREADING_IMPL_TEMPLATES_H_
diff --git a/libs/ode-0.16.1/ode/src/threading_impl_win.h b/libs/ode-0.16.1/ode/src/threading_impl_win.h
new file mode 100644
index 0000000..f3cb489
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_impl_win.h
@@ -0,0 +1,273 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading Windows implementation file.                                *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  Threading Windows implementation for built-in threading support provider.
+ */
+
+
+#ifndef _ODE_THREADING_IMPL_WIN_H_
+#define _ODE_THREADING_IMPL_WIN_H_
+
+
+#include <ode/common.h>
+
+
+#if defined(_WIN32)
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+#if !defined(_WIN32_WINNT)
+#define _WIN32_WINNT 0x0400
+#endif
+#include <windows.h>
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+#include "threading_impl_templates.h"
+#include "threading_fake_sync.h"
+#include "threading_atomics_provs.h"
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+/************************************************************************/
+/* dxEventWakeup class implementation                                   */
+/************************************************************************/
+
+class dxEventWakeup
+{
+public:
+    dxEventWakeup(): m_state_is_permanent(false), m_event_handle(NULL) {}
+    ~dxEventWakeup() { DoFinalizeObject(); }
+
+    bool InitializeObject() { return DoInitializeObject(); }
+
+private:
+    bool DoInitializeObject();
+    void DoFinalizeObject();
+
+public:
+    void ResetWakeup();
+    void WakeupAThread();
+    void WakeupAllThreads();
+
+    bool WaitWakeup(const dThreadedWaitTime *timeout_time_ptr);
+
+private:
+    bool          m_state_is_permanent;
+    HANDLE        m_event_handle;  
+};
+
+
+bool dxEventWakeup::DoInitializeObject()
+{
+    dIASSERT(m_event_handle == NULL);
+
+    bool init_result = false;
+
+    do
+    {
+        HANDLE event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
+        if (event_handle == NULL)
+        {
+            break;
+        }
+
+        m_event_handle = event_handle;
+        init_result = true;
+    }
+    while (false);
+
+    return init_result;
+}
+
+void dxEventWakeup::DoFinalizeObject()
+{
+    HANDLE event_handle = m_event_handle;
+
+    if (event_handle != NULL)
+    {
+        BOOL close_result = CloseHandle(event_handle);
+        dICHECK(close_result != FALSE);
+
+        m_event_handle = NULL;
+    }
+}
+
+
+void dxEventWakeup::ResetWakeup()
+{
+    // Order of assignment and resetting event is not important but it is preferable to be performed this way.
+    m_state_is_permanent = false;
+
+    BOOL event_set_result = ResetEvent(m_event_handle);
+    dICHECK(event_set_result);
+}
+
+void dxEventWakeup::WakeupAThread()
+{
+    dIASSERT(!m_state_is_permanent); // Wakeup should not be used after permanent signal
+
+    BOOL event_reset_result = SetEvent(m_event_handle);
+    dICHECK(event_reset_result);
+}
+
+void dxEventWakeup::WakeupAllThreads()
+{
+    // Order of assignment and setting event is important!
+    m_state_is_permanent = true;
+
+    BOOL event_set_result = SetEvent(m_event_handle);
+    dICHECK(event_set_result);
+}
+
+
+bool dxEventWakeup::WaitWakeup(const dThreadedWaitTime *timeout_time_ptr)
+{
+    bool wait_result;
+
+    if (timeout_time_ptr == NULL)
+    {
+        DWORD event_wait_result = WaitForSingleObject(m_event_handle, INFINITE);
+        dICHECK(event_wait_result == WAIT_OBJECT_0);
+
+        wait_result = true;
+    }
+    else if (timeout_time_ptr->wait_sec == 0 && timeout_time_ptr->wait_nsec == 0)
+    {
+        DWORD event_wait_result = WaitForSingleObject(m_event_handle, 0);
+
+        wait_result = event_wait_result == WAIT_OBJECT_0;
+        dICHECK(wait_result || event_wait_result == WAIT_TIMEOUT);
+    }
+    else
+    {
+        dIASSERT(timeout_time_ptr->wait_nsec < 1000000000UL);
+
+        const DWORD max_wait_seconds_in_a_shot = ((INFINITE - 1) / 1000U) - 1;
+
+        time_t timeout_seconds_remaining = timeout_time_ptr->wait_sec;
+        DWORD wait_timeout = timeout_time_ptr->wait_nsec != 0 ? ((timeout_time_ptr->wait_nsec + 999999UL) / 1000000UL) : 0;
+
+        while (true)
+        {
+            if (timeout_seconds_remaining >= (time_t)max_wait_seconds_in_a_shot)
+            {
+                wait_timeout += max_wait_seconds_in_a_shot * 1000U;
+                timeout_seconds_remaining -= max_wait_seconds_in_a_shot;
+            }
+            else
+            {
+                wait_timeout += (DWORD)timeout_seconds_remaining * 1000U;
+                timeout_seconds_remaining = 0;
+            }
+
+            DWORD event_wait_result = WaitForSingleObject(m_event_handle, wait_timeout);
+
+            if (event_wait_result == WAIT_OBJECT_0)
+            {
+                wait_result = true;
+                break;
+            }
+
+            dICHECK(event_wait_result == WAIT_TIMEOUT);
+
+            if (timeout_seconds_remaining == 0)
+            {
+                wait_result = false;
+                break;
+            }
+
+            wait_timeout = 0;
+        }
+    }
+
+    if (wait_result && m_state_is_permanent)
+    {
+        // Since event is automatic it is necessary to set it back for the upcoming waiters
+        BOOL event_set_result = SetEvent(m_event_handle);
+        dICHECK(event_set_result);
+    }
+
+    return wait_result;
+}
+
+
+/************************************************************************/
+/* dxCriticalSectionMutex class implementation                          */
+/************************************************************************/
+
+class dxCriticalSectionMutex
+{
+public:
+    dxCriticalSectionMutex() { InitializeCriticalSection(&m_critical_section); }
+    ~dxCriticalSectionMutex() { DeleteCriticalSection(&m_critical_section); }
+
+    bool InitializeObject() { return true; }
+
+public:
+    void LockMutex() { EnterCriticalSection(&m_critical_section); }
+    bool TryLockMutex() { return TryEnterCriticalSection(&m_critical_section) != FALSE; }
+    void UnlockMutex() { LeaveCriticalSection(&m_critical_section); }
+
+private:
+    CRITICAL_SECTION      m_critical_section;
+};
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+/************************************************************************/
+/* Self-threaded job list definition                                    */
+/************************************************************************/
+
+typedef dxtemplateJobListContainer<dxFakeLull, dxFakeMutex, dxFakeAtomicsProvider> dxSelfThreadedJobListContainer;
+typedef dxtemplateJobListSelfHandler<dxSelfWakeup, dxSelfThreadedJobListContainer> dxSelfThreadedJobListHandler;
+typedef dxtemplateThreadingImplementation<dxSelfThreadedJobListContainer, dxSelfThreadedJobListHandler> dxSelfThreadedThreading;
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+/************************************************************************/
+/* Multi-threaded job list definition                                   */
+/************************************************************************/
+
+typedef dxtemplateJobListContainer<dxtemplateThreadedLull<dxEventWakeup, dxOUAtomicsProvider, false>, dxCriticalSectionMutex, dxOUAtomicsProvider> dxMultiThreadedJobListContainer;
+typedef dxtemplateJobListThreadedHandler<dxEventWakeup, dxMultiThreadedJobListContainer> dxMultiThreadedJobListHandler;
+typedef dxtemplateThreadingImplementation<dxMultiThreadedJobListContainer, dxMultiThreadedJobListHandler> dxMultiThreadedThreading;
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+#endif // #if defined(_WIN32)
+
+
+#endif // #ifndef _ODE_THREADING_IMPL_WIN_H_
diff --git a/libs/ode-0.16.1/ode/src/threading_pool_posix.cpp b/libs/ode-0.16.1/ode/src/threading_pool_posix.cpp
new file mode 100644
index 0000000..39d0d56
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_pool_posix.cpp
@@ -0,0 +1,823 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading POSIX thread pool implementation file.                      *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  POSIX thread pool implementation for built-in threading support provider.
+ */
+
+
+#if !defined(_WIN32)
+
+#include <ode/odeconfig.h>
+#include <ode/error.h>
+#include <ode/threading_impl.h>
+#include <ode/odeinit.h>
+#include "config.h"
+#include "objects.h"
+#include "threading_impl_templates.h"
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+#include <new>
+#include <pthread.h>
+#include <signal.h>
+#include <errno.h>
+
+#if !defined(EOK)
+#define EOK   0
+#endif
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+struct dxEventObject
+{
+public:
+    dxEventObject(): m_event_allocated(false), m_event_manual(false), m_event_value(false) {}
+    ~dxEventObject() { FinalizeObject(); }
+
+    bool InitializeObject(bool manual_reset, bool initial_state);
+    void FinalizeObject();
+
+    // WARNING! To make implementation simpler, event only releases a single thread even for manual mode.
+    bool WaitInfinitely();
+    void SetEvent();
+    void ResetEvent();
+
+private:
+    bool              m_event_allocated;
+    bool              m_event_manual;
+    bool              m_event_value;
+    pthread_mutex_t   m_event_mutex;
+    pthread_cond_t    m_event_cond;
+};
+
+bool dxEventObject::InitializeObject(bool manual_reset, bool initial_state)
+{
+    dIASSERT(!m_event_allocated);
+
+    bool result = false;
+
+    bool cond_allocated = false;
+
+    do 
+    {
+        int cond_result = pthread_cond_init(&m_event_cond, NULL);
+        if (cond_result != EOK)
+        {
+            errno = cond_result;
+            break;
+        }
+
+        cond_allocated = true;
+
+        int mutex_result = pthread_mutex_init(&m_event_mutex, NULL);
+        if (mutex_result != EOK)
+        {
+            errno = mutex_result;
+            break;
+        }
+
+        m_event_manual = manual_reset;
+        m_event_value = initial_state;
+        m_event_allocated = true;
+        result = true;
+    }
+    while (false);
+
+    if (!result)
+    {
+        if (cond_allocated)
+        {
+            int cond_destroy_result = pthread_cond_destroy(&m_event_cond);
+            dIVERIFY(cond_destroy_result == EOK);
+        }
+    }
+
+    return result;
+}
+
+void dxEventObject::FinalizeObject()
+{
+    if (m_event_allocated)
+    {
+        int mutex_destroy_result = pthread_mutex_destroy(&m_event_mutex);
+        dICHECK(mutex_destroy_result == EOK); // Why would mutex be unable to be destroyed?
+
+        int cond_destroy_result = pthread_cond_destroy(&m_event_cond);
+        dICHECK(cond_destroy_result == EOK); // Why would condvar be unable to be destroyed?
+
+        m_event_allocated = false;
+    }
+}
+
+bool dxEventObject::WaitInfinitely()
+{
+    bool result = false;
+
+    int lock_result = pthread_mutex_lock(&m_event_mutex);
+    dICHECK(lock_result == EOK);
+
+    int wait_result = EOK;
+    if (!m_event_value)
+    {
+        wait_result = pthread_cond_wait(&m_event_cond, &m_event_mutex);
+        dICHECK(wait_result != EINTR); // Would caller be so kind to disable signal handling for thread for duration of the call to ODE at least?
+    }
+
+    if (wait_result == EOK)
+    {
+        dIASSERT(m_event_value);
+
+        if (!m_event_manual)
+        {
+            m_event_value = false;
+        }
+
+        result = true;
+    }
+
+    int unlock_result = pthread_mutex_unlock(&m_event_mutex);
+    dICHECK(unlock_result == EOK);
+
+    return result;
+}
+
+void dxEventObject::SetEvent()
+{
+    int lock_result = pthread_mutex_lock(&m_event_mutex);
+    dICHECK(lock_result == EOK);
+
+    if (!m_event_value)
+    {
+        m_event_value = true;
+
+        // NOTE! Event only releases a single thread even for manual mode to simplify implementation.
+        int signal_result = pthread_cond_signal(&m_event_cond);
+        dICHECK(signal_result == EOK);
+    }
+
+    int unlock_result = pthread_mutex_unlock(&m_event_mutex);
+    dICHECK(unlock_result == EOK);
+}
+
+void dxEventObject::ResetEvent()
+{
+    int lock_result = pthread_mutex_lock(&m_event_mutex);
+    dICHECK(lock_result == EOK);
+
+    m_event_value = false;
+
+    int unlock_result = pthread_mutex_unlock(&m_event_mutex);
+    dICHECK(unlock_result == EOK);
+}
+
+
+struct dxThreadPoolThreadInfo
+{
+public:
+    dxThreadPoolThreadInfo();
+    ~dxThreadPoolThreadInfo();
+
+    bool Initialize(sizeint stack_size, unsigned int ode_data_allocate_flags);
+
+private:
+    bool InitializeThreadAttributes(pthread_attr_t *thread_attr, sizeint stack_size);
+    void FinalizeThreadAttributes(pthread_attr_t *thread_attr);
+    bool WaitInitStatus();
+
+private:
+    void Finalize();
+    void WaitAndCloseThreadHandle(pthread_t thread_handle);
+
+public:
+    enum dxTHREADCOMMAND
+    {
+        dxTHREAD_COMMAND_EXIT,
+        dxTHREAD_COMMAND_NOOP,
+        dxTHREAD_COMMAND_SERVE_IMPLEMENTATION,
+    };
+
+    struct dxServeImplementationParams
+    {
+        dxServeImplementationParams(dThreadingImplementationID impl, dxEventObject *ready_wait_event):
+    m_impl(impl), m_ready_wait_event(ready_wait_event)
+    {
+    }
+
+    dThreadingImplementationID m_impl;
+    dxEventObject *m_ready_wait_event;
+    };
+
+    void ExecuteThreadCommand(dxTHREADCOMMAND command, void *param, bool wait_response);
+
+private:
+    static void *ThreadProcedure_Callback(void *thread_param);
+    void ThreadProcedure();
+    bool DisableSignalHandlers();
+    void ReportInitStatus(bool init_result);
+    void RunCommandHandlingLoop();
+
+    void ThreadedServeImplementation(dThreadingImplementationID impl, dxEventObject *ready_wait_event);
+    static void ProcessThreadServeReadiness_Callback(void *context);
+
+private:
+    pthread_t   m_thread_handle;
+    bool        m_thread_allocated;
+
+    unsigned int m_ode_data_allocate_flags;
+    dxTHREADCOMMAND m_command_code;
+    dxEventObject m_command_event;
+    dxEventObject m_acknowledgement_event;
+    void        *m_command_param;
+};
+
+
+dxThreadPoolThreadInfo::dxThreadPoolThreadInfo():
+m_thread_handle(),
+m_thread_allocated(false),
+m_ode_data_allocate_flags(0),
+m_command_code(dxTHREAD_COMMAND_EXIT),
+m_command_event(),
+m_acknowledgement_event(),
+m_command_param(NULL)
+{
+}
+
+dxThreadPoolThreadInfo::~dxThreadPoolThreadInfo()
+{
+    Finalize();
+}
+
+
+bool dxThreadPoolThreadInfo::Initialize(sizeint stack_size, unsigned int ode_data_allocate_flags)
+{
+    bool result = false;
+
+    bool command_event_allocated = false, acknowledgement_event_allocated = false;
+
+    do 
+    {
+        // -- There is no implicit limit on stack size in POSIX implementation
+        // if (stack_size > ...)
+        // {
+        //   errno = EINVAL;
+        //   break;
+        // }
+
+        if (!m_command_event.InitializeObject(false, false))
+        {
+            break;
+        }
+
+        command_event_allocated = true;
+
+        if (!m_acknowledgement_event.InitializeObject(true, false))
+        {
+            break;
+        }
+
+        acknowledgement_event_allocated = true;
+
+        m_ode_data_allocate_flags = ode_data_allocate_flags;
+
+        pthread_attr_t thread_attr;
+        if (!InitializeThreadAttributes(&thread_attr, stack_size))
+        {
+            break;
+        }
+
+        int thread_create_result = pthread_create(&m_thread_handle, &thread_attr, &ThreadProcedure_Callback, (void *)this);
+
+        FinalizeThreadAttributes(&thread_attr);
+
+        if (thread_create_result != EOK)
+        {
+            errno = thread_create_result;
+            break;
+        }
+
+        bool thread_init_result = WaitInitStatus();
+        if (!thread_init_result)
+        {
+            WaitAndCloseThreadHandle(m_thread_handle);
+            break;
+        }
+
+        m_thread_allocated = true;
+        result = true;
+    }
+    while (false);
+
+    if (!result)
+    {
+        if (command_event_allocated)
+        {
+            if (acknowledgement_event_allocated)
+            {
+                m_acknowledgement_event.FinalizeObject();
+            }
+
+            m_command_event.FinalizeObject();
+        }
+    }
+
+    return result;
+}
+
+bool dxThreadPoolThreadInfo::InitializeThreadAttributes(pthread_attr_t *thread_attr, sizeint stack_size)
+{
+    bool result = false;
+
+    bool attr_inited = false;
+
+    do 
+    {
+        int init_result = pthread_attr_init(thread_attr);
+        if (init_result != EOK)
+        {
+            errno = init_result;
+            break;
+        }
+
+        attr_inited = true;
+
+        int set_result;
+        if ((set_result = pthread_attr_setdetachstate(thread_attr, PTHREAD_CREATE_JOINABLE)) != EOK
+#if (HAVE_PTHREAD_ATTR_SETINHERITSCHED)
+            || (set_result = pthread_attr_setinheritsched(thread_attr, PTHREAD_INHERIT_SCHED)) != EOK
+#endif
+#if (HAVE_PTHREAD_ATTR_SETSTACKLAZY)
+            || (set_result = pthread_attr_setstacklazy(thread_attr, PTHREAD_STACK_NOTLAZY)) != EOK
+#endif
+            || (stack_size != 0 && (set_result = pthread_attr_setstacksize(thread_attr, stack_size)) != EOK))
+        {
+            errno = set_result;
+            break;
+        }
+
+        result = true;
+    }
+    while (false);
+
+    if (!result)
+    {
+        if (attr_inited)
+        {
+            int destroy_result = pthread_attr_destroy(thread_attr);
+            dIVERIFY(destroy_result == EOK);
+        }
+    }
+
+    return result;
+}
+
+void dxThreadPoolThreadInfo::FinalizeThreadAttributes(pthread_attr_t *thread_attr)
+{
+    int destroy_result = pthread_attr_destroy(thread_attr);
+    dIVERIFY(destroy_result == EOK);
+}
+
+bool dxThreadPoolThreadInfo::WaitInitStatus()
+{
+    bool acknowledgement_wait_result = m_acknowledgement_event.WaitInfinitely();
+    dICHECK(acknowledgement_wait_result);
+
+    int error_code = (int)(sizeint)m_command_param;
+
+    bool init_status = error_code == EOK ? true : ((errno = error_code), false);
+    return init_status;
+}
+
+void dxThreadPoolThreadInfo::Finalize()
+{
+    if (m_thread_allocated)
+    {
+        ExecuteThreadCommand(dxTHREAD_COMMAND_EXIT, NULL, false);
+
+        WaitAndCloseThreadHandle(m_thread_handle);
+        m_thread_allocated = false;
+
+        m_command_event.FinalizeObject();
+        m_acknowledgement_event.FinalizeObject();
+    }
+}
+
+void dxThreadPoolThreadInfo::WaitAndCloseThreadHandle(pthread_t thread_handle)
+{
+    int join_result = pthread_join(thread_handle, NULL);
+    dICHECK(join_result == EOK);
+}
+
+void dxThreadPoolThreadInfo::ExecuteThreadCommand(dxTHREADCOMMAND command, void *param, bool wait_response)
+{
+    bool acknowledgement_wait_result = m_acknowledgement_event.WaitInfinitely();
+    dICHECK(acknowledgement_wait_result);
+
+    m_acknowledgement_event.ResetEvent();
+
+    m_command_code = command;
+    m_command_param = param;
+
+    m_command_event.SetEvent();
+
+    if (wait_response)
+    {
+        bool new_acknowledgement_wait_result = m_acknowledgement_event.WaitInfinitely();
+        dICHECK(new_acknowledgement_wait_result);
+    }
+}
+
+void *dxThreadPoolThreadInfo::ThreadProcedure_Callback(void *thread_param)
+{
+    dxThreadPoolThreadInfo *thread_info = (dxThreadPoolThreadInfo *)thread_param;
+    thread_info->ThreadProcedure();
+
+    return 0;
+}
+
+void dxThreadPoolThreadInfo::ThreadProcedure()
+{
+    bool init_result = dAllocateODEDataForThread(m_ode_data_allocate_flags) != 0
+        && DisableSignalHandlers();
+
+    ReportInitStatus(init_result);
+
+    if (init_result)
+    {
+        RunCommandHandlingLoop();
+
+        // dCleanupODEAllDataForThread(); -- this function can only be called if ODE was initialized for manual cleanup. And that is unknown here...
+    }
+}
+
+bool dxThreadPoolThreadInfo::DisableSignalHandlers()
+{
+    bool result = false;
+
+    sigset_t set;
+    sigfillset( &set );
+
+    if (sigprocmask( SIG_BLOCK, &set, NULL ) != -1)
+    {
+        result = true;
+    }
+
+    return result;
+}
+
+void dxThreadPoolThreadInfo::ReportInitStatus(bool init_result)
+{
+    m_command_param = (void *)(sizeint)(init_result ? EOK : ((errno != EOK) ? errno : EFAULT));
+
+    m_acknowledgement_event.SetEvent();
+}
+
+void dxThreadPoolThreadInfo::RunCommandHandlingLoop()
+{
+    bool exit_requested = false;
+
+    while (!exit_requested)
+    {
+        bool command_wait_result = m_command_event.WaitInfinitely();
+        dICHECK(command_wait_result);
+
+        const dxTHREADCOMMAND command_code = m_command_code;
+        switch (command_code)
+        {
+            case dxTHREAD_COMMAND_EXIT:
+            {
+                m_acknowledgement_event.SetEvent();
+
+                exit_requested = true;
+                break;
+            }
+
+            default:
+            {
+                dIASSERT(false);
+                // break; -- proceed to case dxTHREAD_COMMAND_NOOP
+            }
+
+            case dxTHREAD_COMMAND_NOOP:
+            {
+                m_acknowledgement_event.SetEvent();
+
+                // Do nothing
+                break;
+            }
+
+            case dxTHREAD_COMMAND_SERVE_IMPLEMENTATION:
+            {
+                const dxServeImplementationParams *serve_params = (const dxServeImplementationParams *)m_command_param;
+                dThreadingImplementationID impl = serve_params->m_impl;
+                dxEventObject *ready_wait_event = serve_params->m_ready_wait_event;
+
+                m_acknowledgement_event.SetEvent();
+
+                ThreadedServeImplementation(impl, ready_wait_event);
+                break;
+            }
+        }
+    }
+}
+
+void dxThreadPoolThreadInfo::ThreadedServeImplementation(dThreadingImplementationID impl, dxEventObject *ready_wait_event)
+{
+    ((dxIThreadingImplementation *)impl)->StickToJobsProcessing(&ProcessThreadServeReadiness_Callback, (void *)ready_wait_event);
+}
+
+void dxThreadPoolThreadInfo::ProcessThreadServeReadiness_Callback(void *context)
+{
+    dxEventObject *ready_wait_event = (dxEventObject *)context;
+
+    ready_wait_event->SetEvent();
+}
+
+
+
+struct dxThreadingThreadPool:
+    public dBase
+{
+public:
+    dxThreadingThreadPool();
+    ~dxThreadingThreadPool();
+
+    bool InitializeThreads(sizeint thread_count, sizeint stack_size, unsigned int ode_data_allocate_flags);
+
+private:
+    void FinalizeThreads();
+
+    bool InitializeIndividualThreadInfos(dxThreadPoolThreadInfo *thread_infos, sizeint thread_count, sizeint stack_size, unsigned int ode_data_allocate_flags);
+    void FinalizeIndividualThreadInfos(dxThreadPoolThreadInfo *thread_infos, sizeint thread_count);
+
+    bool InitializeSingleThreadInfo(dxThreadPoolThreadInfo *thread_info, sizeint stack_size, unsigned int ode_data_allocate_flags);
+    void FinalizeSingleThreadInfo(dxThreadPoolThreadInfo *thread_info);
+
+public:
+    void ServeThreadingImplementation(dThreadingImplementationID impl);
+    void WaitIdleState();
+
+private:
+    dxThreadPoolThreadInfo  *m_thread_infos;
+    sizeint                  m_thread_count;
+    dxEventObject           m_ready_wait_event;
+};
+
+
+dxThreadingThreadPool::dxThreadingThreadPool():
+m_thread_infos(NULL),
+m_thread_count(0),
+m_ready_wait_event()
+{
+}
+
+dxThreadingThreadPool::~dxThreadingThreadPool()
+{
+    FinalizeThreads();
+}
+
+
+bool dxThreadingThreadPool::InitializeThreads(sizeint thread_count, sizeint stack_size, unsigned int ode_data_allocate_flags)
+{
+    dIASSERT(m_thread_infos == NULL);
+
+    bool result = false;
+
+    bool wait_event_allocated = false;
+
+    dxThreadPoolThreadInfo *thread_infos = NULL;
+    bool thread_infos_allocated = false;
+
+    do
+    {
+        if (!m_ready_wait_event.InitializeObject(false, false))
+        {
+            break;
+        }
+
+        wait_event_allocated = true;
+
+        thread_infos = (dxThreadPoolThreadInfo *)dAlloc(thread_count * sizeof(dxThreadPoolThreadInfo));
+        if (thread_infos == NULL)
+        {
+            break;
+        }
+
+        thread_infos_allocated = true;
+
+        if (!InitializeIndividualThreadInfos(thread_infos, thread_count, stack_size, ode_data_allocate_flags))
+        {
+            break;
+        }
+
+        m_thread_infos = thread_infos;
+        m_thread_count = thread_count;
+        result = true;
+    }
+    while (false);
+
+    if (!result)
+    {
+        if (wait_event_allocated)
+        {
+            if (thread_infos_allocated)
+            {
+                dFree(thread_infos, thread_count * sizeof(dxThreadPoolThreadInfo));
+            }
+
+            m_ready_wait_event.FinalizeObject();
+        }
+    }
+
+    return result;
+}
+
+void dxThreadingThreadPool::FinalizeThreads()
+{
+    dxThreadPoolThreadInfo *thread_infos = m_thread_infos;
+    if (thread_infos != NULL)
+    {
+        sizeint thread_count = m_thread_count;
+
+        FinalizeIndividualThreadInfos(thread_infos, thread_count);
+        dFree(thread_infos, thread_count * sizeof(dxThreadPoolThreadInfo));
+
+        m_ready_wait_event.FinalizeObject();
+    }
+}
+
+
+bool dxThreadingThreadPool::InitializeIndividualThreadInfos(dxThreadPoolThreadInfo *thread_infos, sizeint thread_count, sizeint stack_size, unsigned int ode_data_allocate_flags)
+{
+    bool any_fault = false;
+
+    dxThreadPoolThreadInfo *const infos_end = thread_infos + thread_count;
+    for (dxThreadPoolThreadInfo *current_info = thread_infos; current_info != infos_end; ++current_info)
+    {
+        if (!InitializeSingleThreadInfo(current_info, stack_size, ode_data_allocate_flags))
+        {
+            FinalizeIndividualThreadInfos(thread_infos, current_info - thread_infos);
+
+            any_fault = true;
+            break;
+        }
+    }
+
+    bool result = !any_fault;
+    return result;
+}
+
+void dxThreadingThreadPool::FinalizeIndividualThreadInfos(dxThreadPoolThreadInfo *thread_infos, sizeint thread_count)
+{
+    dxThreadPoolThreadInfo *const infos_end = thread_infos + thread_count;
+    for (dxThreadPoolThreadInfo *current_info = thread_infos; current_info != infos_end; ++current_info)
+    {
+        FinalizeSingleThreadInfo(current_info);
+    }
+}
+
+
+bool dxThreadingThreadPool::InitializeSingleThreadInfo(dxThreadPoolThreadInfo *thread_info, sizeint stack_size, unsigned int ode_data_allocate_flags)
+{
+    bool result = false;
+
+    new(thread_info) dxThreadPoolThreadInfo();
+
+    if (thread_info->Initialize(stack_size, ode_data_allocate_flags))
+    {
+        result = true;
+    }
+    else
+    {
+        thread_info->dxThreadPoolThreadInfo::~dxThreadPoolThreadInfo();
+    }
+
+    return result;
+}
+
+void dxThreadingThreadPool::FinalizeSingleThreadInfo(dxThreadPoolThreadInfo *thread_info)
+{
+    if (thread_info != NULL)
+    {
+        thread_info->dxThreadPoolThreadInfo::~dxThreadPoolThreadInfo();
+    }
+}
+
+
+void dxThreadingThreadPool::ServeThreadingImplementation(dThreadingImplementationID impl)
+{
+    dxThreadPoolThreadInfo::dxServeImplementationParams params(impl, &m_ready_wait_event);
+
+    dxThreadPoolThreadInfo *const infos_end = m_thread_infos + m_thread_count;
+    for (dxThreadPoolThreadInfo *current_info = m_thread_infos; current_info != infos_end; ++current_info)
+    {
+        current_info->ExecuteThreadCommand(dxThreadPoolThreadInfo::dxTHREAD_COMMAND_SERVE_IMPLEMENTATION, &params, true);
+
+        bool ready_wait_result = m_ready_wait_event.WaitInfinitely();
+        dICHECK(ready_wait_result);
+    }
+}
+
+void dxThreadingThreadPool::WaitIdleState()
+{
+    dxThreadPoolThreadInfo *const infos_end = m_thread_infos + m_thread_count;
+    for (dxThreadPoolThreadInfo *current_info = m_thread_infos; current_info != infos_end; ++current_info)
+    {
+        current_info->ExecuteThreadCommand(dxThreadPoolThreadInfo::dxTHREAD_COMMAND_NOOP, NULL, true);
+    }
+}
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+/*extern */dThreadingThreadPoolID dThreadingAllocateThreadPool(unsigned thread_count, 
+                                                               sizeint stack_size, unsigned int ode_data_allocate_flags, void *reserved/*=NULL*/)
+{
+    dAASSERT(thread_count != 0);
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dxThreadingThreadPool *thread_pool = new dxThreadingThreadPool();
+    if (thread_pool != NULL)
+    {
+        if (thread_pool->InitializeThreads(thread_count, stack_size, ode_data_allocate_flags))
+        {
+            // do nothing
+        }
+        else
+        {
+            delete thread_pool;
+            thread_pool = NULL;
+        }
+    }
+#else
+    dThreadingThreadPoolID thread_pool = NULL;
+    (void)stack_size; // unused
+    (void)ode_data_allocate_flags; // unused
+    (void)reserved; // unused
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+    return (dThreadingThreadPoolID)thread_pool;
+}
+
+/*extern */void dThreadingThreadPoolServeMultiThreadedImplementation(dThreadingThreadPoolID pool, dThreadingImplementationID impl)
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dxThreadingThreadPool *thread_pool = (dxThreadingThreadPool *)pool;
+    thread_pool->ServeThreadingImplementation(impl);
+#else
+    (void)pool; // unused
+    (void)impl; // unused
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+}
+
+/*extern */void dThreadingThreadPoolWaitIdleState(dThreadingThreadPoolID pool)
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dxThreadingThreadPool *thread_pool = (dxThreadingThreadPool *)pool;
+    thread_pool->WaitIdleState();
+#else
+    (void)pool; // unused
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+}
+
+/*extern */void dThreadingFreeThreadPool(dThreadingThreadPoolID pool)
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dxThreadingThreadPool *thread_pool = (dxThreadingThreadPool *)pool;
+    delete thread_pool;
+#else
+    (void)pool; // unused
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+}
+
+
+#endif // #if !defined(_WIN32)
diff --git a/libs/ode-0.16.1/ode/src/threading_pool_win.cpp b/libs/ode-0.16.1/ode/src/threading_pool_win.cpp
new file mode 100644
index 0000000..5c17f10
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threading_pool_win.cpp
@@ -0,0 +1,670 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * Threading Windows thread pool implementation file.                    *
+ * Copyright (C) 2011-2019 Oleh Derevenko. All rights reserved.          *
+ * e-mail: odar@eleks.com (change all "a" to "e")                        *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+ *  Windows thread pool implementation for built-in threading support provider.
+ */
+
+
+#if defined(_WIN32)
+
+#include <ode/odeconfig.h>
+#include <ode/error.h>
+#include <ode/threading_impl.h>
+#include <ode/odeinit.h>
+#include "config.h"
+#include "objects.h"
+#include "threading_impl_templates.h"
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+#include <Windows.h>
+#include <process.h>
+#include <new>
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+
+#define THREAD_STACK_MAX  ((sizeint)(UINT_MAX - 1)) // The absolute maximum would be UINT_MAX but let it be a little bit less to avoid "Comparison is always false" warnings. ;)
+
+
+struct dxEventObject
+{
+public:
+    dxEventObject(): m_event_handle(NULL) {}
+    ~dxEventObject() { FinalizeObject(); }
+
+    bool InitializeObject(bool manual_reset, bool initial_state);
+    void FinalizeObject();
+
+    bool WaitInfinitely() { return ::WaitForSingleObject(m_event_handle, INFINITE) == WAIT_OBJECT_0; }
+    void SetEvent();
+    void ResetEvent();
+
+private:
+    HANDLE        m_event_handle;
+};
+
+bool dxEventObject::InitializeObject(bool manual_reset, bool initial_state)
+{
+    dIASSERT(m_event_handle == NULL);
+
+    bool result = false;
+
+    do 
+    {
+        HANDLE event_handle = ::CreateEvent(NULL, manual_reset, initial_state, NULL);
+        if (event_handle == NULL)
+        {
+            break;
+        }
+
+        m_event_handle = event_handle;
+        result = true;
+    }
+    while (false);
+
+    return result;
+}
+
+void dxEventObject::FinalizeObject()
+{
+    HANDLE event_handle = m_event_handle;
+    if (event_handle != NULL)
+    {
+        BOOL close_result = ::CloseHandle(event_handle);
+        dICHECK(close_result); // Object destruction should always succeed
+
+        m_event_handle = NULL;
+    }
+}
+
+void dxEventObject::SetEvent()
+{
+    BOOL set_result = ::SetEvent(m_event_handle);
+    dICHECK(set_result);
+}
+
+void dxEventObject::ResetEvent()
+{
+    BOOL reset_result = ::ResetEvent(m_event_handle);
+    dICHECK(reset_result);
+}
+
+
+
+struct dxThreadPoolThreadInfo
+{
+public:
+    dxThreadPoolThreadInfo();
+    ~dxThreadPoolThreadInfo();
+
+    bool Initialize(sizeint stack_size, unsigned int ode_data_allocate_flags);
+
+private:
+    bool WaitInitStatus();
+
+private:
+    void Finalize();
+    void WaitAndCloseThreadHandle(HANDLE thread_handle);
+
+public:
+    enum dxTHREADCOMMAND
+    {
+        dxTHREAD_COMMAND_EXIT,
+        dxTHREAD_COMMAND_NOOP,
+        dxTHREAD_COMMAND_SERVE_IMPLEMENTATION,
+    };
+
+    struct dxServeImplementationParams
+    {
+        dxServeImplementationParams(dThreadingImplementationID impl, dxEventObject *ready_wait_event):
+    m_impl(impl), m_ready_wait_event(ready_wait_event)
+    {
+    }
+
+    dThreadingImplementationID m_impl;
+    dxEventObject *m_ready_wait_event;
+    };
+
+    void ExecuteThreadCommand(dxTHREADCOMMAND command, void *param, bool wait_response);
+
+private:
+    static unsigned CALLBACK ThreadProcedure_Callback(void *thread_param);
+    void ThreadProcedure();
+    void ReportInitStatus(bool init_result);
+    void RunCommandHandlingLoop();
+
+    void ThreadedServeImplementation(dThreadingImplementationID impl, dxEventObject *ready_wait_event);
+    static void ProcessThreadServeReadiness_Callback(void *context);
+
+private:
+    HANDLE      m_thread_handle;
+
+    unsigned int m_ode_data_allocate_flags;
+    dxTHREADCOMMAND m_command_code;
+    dxEventObject m_command_event;
+    dxEventObject m_acknowledgement_event;
+    void        *m_command_param;
+};
+
+
+dxThreadPoolThreadInfo::dxThreadPoolThreadInfo():
+m_thread_handle(NULL),
+m_ode_data_allocate_flags(0),
+m_command_code(dxTHREAD_COMMAND_EXIT),
+m_command_event(),
+m_acknowledgement_event(),
+m_command_param(NULL)
+{
+}
+
+dxThreadPoolThreadInfo::~dxThreadPoolThreadInfo()
+{
+    Finalize();
+}
+
+
+bool dxThreadPoolThreadInfo::Initialize(sizeint stack_size, unsigned int ode_data_allocate_flags)
+{
+    bool result = false;
+
+    bool command_event_allocated = false, acknowledgement_event_allocated = false;
+
+    HANDLE thread_handle = NULL;
+
+    do 
+    {
+        if (stack_size > THREAD_STACK_MAX)
+        {
+            SetLastError(ERROR_INVALID_PARAMETER);
+            break;
+        }
+
+        if (!m_command_event.InitializeObject(false, false))
+        {
+            break;
+        }
+
+        command_event_allocated = true;
+
+        if (!m_acknowledgement_event.InitializeObject(true, false))
+        {
+            break;
+        }
+
+        acknowledgement_event_allocated = true;
+
+        m_ode_data_allocate_flags = ode_data_allocate_flags;
+
+        thread_handle = (HANDLE)_beginthreadex(NULL, (unsigned)stack_size, &ThreadProcedure_Callback, (void *)this, 0, NULL);
+        if (thread_handle == NULL) // Not a bug!!! _beginthreadex() returns NULL on failure
+        {
+            break;
+        }
+
+        // It is OK to alter priority for thread without creating it in suspended state as
+        // it is anyway going to be waited for (waited for its init result) and 
+        // will not be issues commands until after that.
+        int own_priority = GetThreadPriority(GetCurrentThread());
+        if (own_priority != THREAD_PRIORITY_ERROR_RETURN)
+        {
+            if (!SetThreadPriority(thread_handle, own_priority))
+            {
+                // own_priority = THREAD_PRIORITY_ERROR_RETURN; -- Well, if priority inheritance fails - just ignore it :-/
+            }
+        }
+
+        bool thread_init_result = WaitInitStatus();
+        if (!thread_init_result)
+        {
+            DWORD error_save = GetLastError();
+            WaitAndCloseThreadHandle(thread_handle);
+            SetLastError(error_save);
+            break;
+        }
+
+        m_thread_handle = thread_handle;
+        result = true;
+    }
+    while (false);
+
+    if (!result)
+    {
+        if (command_event_allocated)
+        {
+            if (acknowledgement_event_allocated)
+            {
+                m_acknowledgement_event.FinalizeObject();
+            }
+
+            m_command_event.FinalizeObject();
+        }
+    }
+
+    return result;
+}
+
+bool dxThreadPoolThreadInfo::WaitInitStatus()
+{
+    bool acknowledgement_wait_result = m_acknowledgement_event.WaitInfinitely();
+    dICHECK(acknowledgement_wait_result);
+
+    DWORD error_code = (DWORD)(sizeint)m_command_param;
+
+    bool init_status = error_code == ERROR_SUCCESS ? true : (SetLastError(error_code), false);
+    return init_status;
+}
+
+void dxThreadPoolThreadInfo::Finalize()
+{
+    HANDLE thread_handle = m_thread_handle;
+    if (thread_handle != NULL)
+    {
+        ExecuteThreadCommand(dxTHREAD_COMMAND_EXIT, NULL, false);
+
+        WaitAndCloseThreadHandle(thread_handle);
+        m_thread_handle = NULL;
+
+        m_command_event.FinalizeObject();
+        m_acknowledgement_event.FinalizeObject();
+    }
+}
+
+void dxThreadPoolThreadInfo::WaitAndCloseThreadHandle(HANDLE thread_handle)
+{
+    DWORD thread_wait_result = WaitForSingleObject(thread_handle, INFINITE);
+    dICHECK(thread_wait_result == WAIT_OBJECT_0);
+
+    BOOL thread_close_result = CloseHandle(thread_handle);
+    dIVERIFY(thread_close_result);
+
+}
+
+void dxThreadPoolThreadInfo::ExecuteThreadCommand(dxTHREADCOMMAND command, void *param, bool wait_response)
+{
+    bool acknowledgement_wait_result = m_acknowledgement_event.WaitInfinitely();
+    dICHECK(acknowledgement_wait_result);
+
+    m_acknowledgement_event.ResetEvent();
+
+    m_command_code = command;
+    m_command_param = param;
+
+    m_command_event.SetEvent();
+
+    if (wait_response)
+    {
+        bool new_acknowledgement_wait_result = m_acknowledgement_event.WaitInfinitely();
+        dICHECK(new_acknowledgement_wait_result);
+    }
+}
+
+unsigned CALLBACK dxThreadPoolThreadInfo::ThreadProcedure_Callback(void *thread_param)
+{
+    dxThreadPoolThreadInfo *thread_info = (dxThreadPoolThreadInfo *)thread_param;
+    thread_info->ThreadProcedure();
+
+    return 0;
+}
+
+void dxThreadPoolThreadInfo::ThreadProcedure()
+{
+    bool init_result = dAllocateODEDataForThread(m_ode_data_allocate_flags) != 0;
+
+    ReportInitStatus(init_result);
+
+    if (init_result)
+    {
+        RunCommandHandlingLoop();
+
+        // dCleanupODEAllDataForThread(); -- this function can only be called if ODE was initialized for manual cleanup. And that is unknown here...
+    }
+}
+
+void dxThreadPoolThreadInfo::ReportInitStatus(bool init_result)
+{
+    DWORD error_code;
+    m_command_param = (void *)(sizeint)(init_result ? ERROR_SUCCESS : ((error_code = GetLastError()) != ERROR_SUCCESS ? error_code : ERROR_INTERNAL_ERROR));
+
+    m_acknowledgement_event.SetEvent();
+}
+
+void dxThreadPoolThreadInfo::RunCommandHandlingLoop()
+{
+    bool exit_requested = false;
+
+    while (!exit_requested)
+    {
+        bool command_wait_result = m_command_event.WaitInfinitely();
+        dICHECK(command_wait_result);
+
+        const dxTHREADCOMMAND command_code = m_command_code;
+        switch (command_code)
+        {
+            case dxTHREAD_COMMAND_EXIT:
+            {
+                m_acknowledgement_event.SetEvent();
+
+                exit_requested = true;
+                break;
+            }
+
+            default:
+            {
+                dIASSERT(false);
+                // break; -- proceed to case dxTHREAD_COMMAND_NOOP
+            }
+
+            case dxTHREAD_COMMAND_NOOP:
+            {
+                m_acknowledgement_event.SetEvent();
+
+                // Do nothing
+                break;
+            }
+
+            case dxTHREAD_COMMAND_SERVE_IMPLEMENTATION:
+            {
+                const dxServeImplementationParams *serve_params = (const dxServeImplementationParams *)m_command_param;
+                dThreadingImplementationID impl = serve_params->m_impl;
+                dxEventObject *ready_wait_event = serve_params->m_ready_wait_event;
+
+                m_acknowledgement_event.SetEvent();
+
+                ThreadedServeImplementation(impl, ready_wait_event);
+                break;
+            }
+        }
+    }
+}
+
+void dxThreadPoolThreadInfo::ThreadedServeImplementation(dThreadingImplementationID impl, dxEventObject *ready_wait_event)
+{
+    ((dxIThreadingImplementation *)impl)->StickToJobsProcessing(&ProcessThreadServeReadiness_Callback, (void *)ready_wait_event);
+}
+
+void dxThreadPoolThreadInfo::ProcessThreadServeReadiness_Callback(void *context)
+{
+    dxEventObject *ready_wait_event = (dxEventObject *)context;
+
+    ready_wait_event->SetEvent();
+}
+
+
+
+struct dxThreadingThreadPool:
+    public dBase
+{
+public:
+    dxThreadingThreadPool();
+    ~dxThreadingThreadPool();
+
+    bool InitializeThreads(sizeint thread_count, sizeint stack_size, unsigned int ode_data_allocate_flags);
+
+private:
+    void FinalizeThreads();
+
+    bool InitializeIndividualThreadInfos(dxThreadPoolThreadInfo *thread_infos, sizeint thread_count, sizeint stack_size, unsigned int ode_data_allocate_flags);
+    void FinalizeIndividualThreadInfos(dxThreadPoolThreadInfo *thread_infos, sizeint thread_count);
+
+    bool InitializeSingleThreadInfo(dxThreadPoolThreadInfo *thread_info, sizeint stack_size, unsigned int ode_data_allocate_flags);
+    void FinalizeSingleThreadInfo(dxThreadPoolThreadInfo *thread_info);
+
+public:
+    void ServeThreadingImplementation(dThreadingImplementationID impl);
+    void WaitIdleState();
+
+private:
+    dxThreadPoolThreadInfo  *m_thread_infos;
+    sizeint                  m_thread_count;
+    dxEventObject           m_ready_wait_event;
+};
+
+
+dxThreadingThreadPool::dxThreadingThreadPool():
+m_thread_infos(NULL),
+m_thread_count(0),
+m_ready_wait_event()
+{
+}
+
+dxThreadingThreadPool::~dxThreadingThreadPool()
+{
+    FinalizeThreads();
+}
+
+
+bool dxThreadingThreadPool::InitializeThreads(sizeint thread_count, sizeint stack_size, unsigned int ode_data_allocate_flags)
+{
+    dIASSERT(m_thread_infos == NULL);
+
+    bool result = false;
+
+    bool wait_event_allocated = false;
+
+    dxThreadPoolThreadInfo *thread_infos = NULL;
+    bool thread_infos_allocated = false;
+
+    do
+    {
+        if (!m_ready_wait_event.InitializeObject(false, false))
+        {
+            break;
+        }
+
+        wait_event_allocated = true;
+
+        thread_infos = (dxThreadPoolThreadInfo *)dAlloc(thread_count * sizeof(dxThreadPoolThreadInfo));
+        if (thread_infos == NULL)
+        {
+            break;
+        }
+
+        thread_infos_allocated = true;
+
+        if (!InitializeIndividualThreadInfos(thread_infos, thread_count, stack_size, ode_data_allocate_flags))
+        {
+            break;
+        }
+
+        m_thread_infos = thread_infos;
+        m_thread_count = thread_count;
+        result = true;
+    }
+    while (false);
+
+    if (!result)
+    {
+        if (wait_event_allocated)
+        {
+            if (thread_infos_allocated)
+            {
+                dFree(thread_infos, thread_count * sizeof(dxThreadPoolThreadInfo));
+            }
+
+            m_ready_wait_event.FinalizeObject();
+        }
+    }
+
+    return result;
+}
+
+void dxThreadingThreadPool::FinalizeThreads()
+{
+    dxThreadPoolThreadInfo *thread_infos = m_thread_infos;
+    if (thread_infos != NULL)
+    {
+        sizeint thread_count = m_thread_count;
+
+        FinalizeIndividualThreadInfos(thread_infos, thread_count);
+        dFree(thread_infos, thread_count * sizeof(dxThreadPoolThreadInfo));
+
+        m_ready_wait_event.FinalizeObject();
+    }
+}
+
+
+bool dxThreadingThreadPool::InitializeIndividualThreadInfos(dxThreadPoolThreadInfo *thread_infos, sizeint thread_count, sizeint stack_size, unsigned int ode_data_allocate_flags)
+{
+    bool any_fault = false;
+
+    dxThreadPoolThreadInfo *const infos_end = thread_infos + thread_count;
+    for (dxThreadPoolThreadInfo *current_info = thread_infos; current_info != infos_end; ++current_info)
+    {
+        if (!InitializeSingleThreadInfo(current_info, stack_size, ode_data_allocate_flags))
+        {
+            FinalizeIndividualThreadInfos(thread_infos, current_info - thread_infos);
+
+            any_fault = true;
+            break;
+        }
+    }
+
+    bool result = !any_fault;
+    return result;
+}
+
+void dxThreadingThreadPool::FinalizeIndividualThreadInfos(dxThreadPoolThreadInfo *thread_infos, sizeint thread_count)
+{
+    dxThreadPoolThreadInfo *const infos_end = thread_infos + thread_count;
+    for (dxThreadPoolThreadInfo *current_info = thread_infos; current_info != infos_end; ++current_info)
+    {
+        FinalizeSingleThreadInfo(current_info);
+    }
+}
+
+
+bool dxThreadingThreadPool::InitializeSingleThreadInfo(dxThreadPoolThreadInfo *thread_info, sizeint stack_size, unsigned int ode_data_allocate_flags)
+{
+    bool result = false;
+
+    new(thread_info) dxThreadPoolThreadInfo();
+
+    if (thread_info->Initialize(stack_size, ode_data_allocate_flags))
+    {
+        result = true;
+    }
+    else
+    {
+        thread_info->dxThreadPoolThreadInfo::~dxThreadPoolThreadInfo();
+    }
+
+    return result;
+}
+
+void dxThreadingThreadPool::FinalizeSingleThreadInfo(dxThreadPoolThreadInfo *thread_info)
+{
+    if (thread_info != NULL)
+    {
+        thread_info->dxThreadPoolThreadInfo::~dxThreadPoolThreadInfo();
+    }
+}
+
+
+void dxThreadingThreadPool::ServeThreadingImplementation(dThreadingImplementationID impl)
+{
+    dxThreadPoolThreadInfo::dxServeImplementationParams params(impl, &m_ready_wait_event);
+
+    dxThreadPoolThreadInfo *const infos_end = m_thread_infos + m_thread_count;
+    for (dxThreadPoolThreadInfo *current_info = m_thread_infos; current_info != infos_end; ++current_info)
+    {
+        current_info->ExecuteThreadCommand(dxThreadPoolThreadInfo::dxTHREAD_COMMAND_SERVE_IMPLEMENTATION, &params, true);
+
+        bool ready_wait_result = m_ready_wait_event.WaitInfinitely();
+        dICHECK(ready_wait_result);
+    }
+}
+
+void dxThreadingThreadPool::WaitIdleState()
+{
+    dxThreadPoolThreadInfo *const infos_end = m_thread_infos + m_thread_count;
+    for (dxThreadPoolThreadInfo *current_info = m_thread_infos; current_info != infos_end; ++current_info)
+    {
+        current_info->ExecuteThreadCommand(dxThreadPoolThreadInfo::dxTHREAD_COMMAND_NOOP, NULL, true);
+    }
+}
+
+
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+
+/*extern */dThreadingThreadPoolID dThreadingAllocateThreadPool(unsigned thread_count, 
+                                                               sizeint stack_size, unsigned int ode_data_allocate_flags, void *reserved/*=NULL*/)
+{
+    dAASSERT(thread_count != 0);
+
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dxThreadingThreadPool *thread_pool = new dxThreadingThreadPool();
+    if (thread_pool != NULL)
+    {
+        if (thread_pool->InitializeThreads(thread_count, stack_size, ode_data_allocate_flags))
+        {
+            // do nothing
+        }
+        else
+        {
+            delete thread_pool;
+            thread_pool = NULL;
+        }
+    }
+#else
+    dThreadingThreadPoolID thread_pool = NULL;
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+
+    return (dThreadingThreadPoolID)thread_pool;
+}
+
+/*extern */void dThreadingThreadPoolServeMultiThreadedImplementation(dThreadingThreadPoolID pool, dThreadingImplementationID impl)
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dxThreadingThreadPool *thread_pool = (dxThreadingThreadPool *)pool;
+    thread_pool->ServeThreadingImplementation(impl);
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+}
+
+/*extern */void dThreadingThreadPoolWaitIdleState(dThreadingThreadPoolID pool)
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dxThreadingThreadPool *thread_pool = (dxThreadingThreadPool *)pool;
+    thread_pool->WaitIdleState();
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+}
+
+/*extern */void dThreadingFreeThreadPool(dThreadingThreadPoolID pool)
+{
+#if dBUILTIN_THREADING_IMPL_ENABLED
+    dxThreadingThreadPool *thread_pool = (dxThreadingThreadPool *)pool;
+    delete thread_pool;
+#endif // #if dBUILTIN_THREADING_IMPL_ENABLED
+}
+
+
+#endif // #if defined(_WIN32)
diff --git a/libs/ode-0.16.1/ode/src/threadingutils.h b/libs/ode-0.16.1/ode/src/threadingutils.h
new file mode 100644
index 0000000..fb67052
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/threadingutils.h
@@ -0,0 +1,157 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_THREADINGUTILS_H_
+#define _ODE_THREADINGUTILS_H_
+
+
+#include "odeou.h"
+
+
+#if !dTHREADING_INTF_DISABLED
+
+static inline 
+bool ThrsafeCompareExchange(volatile atomicord32 *paoDestination, atomicord32 aoComparand, atomicord32 aoExchange)
+{
+    return AtomicCompareExchange(paoDestination, aoComparand, aoExchange);
+}
+
+static inline 
+atomicord32 ThrsafeExchange(volatile atomicord32 *paoDestination, atomicord32 aoExchange)
+{
+    return AtomicExchange(paoDestination, aoExchange);
+}
+
+static inline 
+void ThrsafeAdd(volatile atomicord32 *paoDestination, atomicord32 aoAddend)
+{
+    AtomicExchangeAddNoResult(paoDestination, aoAddend);
+}
+
+static inline 
+atomicord32 ThrsafeExchangeAdd(volatile atomicord32 *paoDestination, atomicord32 aoAddend)
+{
+    return AtomicExchangeAdd(paoDestination, aoAddend);
+}
+
+static inline 
+bool ThrsafeCompareExchangePointer(volatile atomicptr *papDestination, atomicptr apComparand, atomicptr apExchange)
+{
+    return AtomicCompareExchangePointer(papDestination, apComparand, apExchange);
+}
+
+static inline 
+atomicptr ThrsafeExchangePointer(volatile atomicptr *papDestination, atomicptr apExchange)
+{
+    return AtomicExchangePointer(papDestination, apExchange);
+}
+
+
+#else // #if dTHREADING_INTF_DISABLED
+
+static inline 
+bool ThrsafeCompareExchange(volatile atomicord32 *paoDestination, atomicord32 aoComparand, atomicord32 aoExchange)
+{
+    return (*paoDestination == aoComparand) ? ((*paoDestination = aoExchange), true) : false;
+}
+
+static inline 
+atomicord32 ThrsafeExchange(volatile atomicord32 *paoDestination, atomicord32 aoExchange)
+{
+    atomicord32 aoDestinationValue = *paoDestination;
+    *paoDestination = aoExchange;
+    return aoDestinationValue;
+}
+
+static inline 
+void ThrsafeAdd(volatile atomicord32 *paoDestination, atomicord32 aoAddend)
+{
+    *paoDestination += aoAddend;
+}
+
+static inline 
+atomicord32 ThrsafeExchangeAdd(volatile atomicord32 *paoDestination, atomicord32 aoAddend)
+{
+    atomicord32 aoDestinationValue = *paoDestination;
+    *paoDestination += aoAddend;
+    return aoDestinationValue;
+}
+
+static inline 
+bool ThrsafeCompareExchangePointer(volatile atomicptr *papDestination, atomicptr apComparand, atomicptr apExchange)
+{
+    return (*papDestination == apComparand) ? ((*papDestination = apExchange), true) : false;
+}
+
+static inline 
+atomicptr ThrsafeExchangePointer(volatile atomicptr *papDestination, atomicptr apExchange)
+{
+    atomicptr apDestinationValue = *papDestination;
+    *papDestination = apExchange;
+    return apDestinationValue;
+}
+
+
+#endif // #if dTHREADING_INTF_DISABLED
+
+
+static inline 
+unsigned int ThrsafeIncrementIntUpToLimit(volatile atomicord32 *storagePointer, unsigned int limitValue)
+{
+    unsigned int resultValue;
+    while (true) {
+        resultValue = *storagePointer;
+        // The ">=" comparison is used here to allow continuing incrementing the destination 
+        // without waiting for all the threads to pass the barrier of checking its value
+        if (resultValue >= limitValue) {
+            resultValue = limitValue;
+            break;
+        }
+        if (ThrsafeCompareExchange(storagePointer, (atomicord32)resultValue, (atomicord32)(resultValue + 1))) {
+            break;
+        }
+    }
+    return resultValue;
+}
+
+static inline 
+sizeint ThrsafeIncrementSizeUpToLimit(volatile sizeint *storagePointer, sizeint limitValue)
+{
+    sizeint resultValue;
+    while (true) {
+        resultValue = *storagePointer;
+        // The ">=" comparison is not required here at present ("==" could be used). 
+        // It is just used this way to match the other function above.
+        if (resultValue >= limitValue) {
+            resultValue = limitValue;
+            break;
+        }
+        if (ThrsafeCompareExchangePointer((volatile atomicptr *)storagePointer, (atomicptr)resultValue, (atomicptr)(resultValue + 1))) {
+            break;
+        }
+    }
+    return resultValue;
+}
+
+
+
+#endif // _ODE_THREADINGUTILS_H_
diff --git a/libs/ode-0.16.1/ode/src/timer.cpp b/libs/ode-0.16.1/ode/src/timer.cpp
new file mode 100644
index 0000000..4f3434a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/timer.cpp
@@ -0,0 +1,424 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+/*
+
+TODO
+----
+
+* gettimeofday() and the pentium time stamp counter return the real time,
+  not the process time. fix this somehow!
+
+*/
+
+#include <ode/common.h>
+#include <ode/timer.h>
+#include "config.h"
+#include "common.h"
+
+
+// misc defines
+#define ALLOCA dALLOCA16
+
+//****************************************************************************
+// implementation for windows based on the multimedia performance counter.
+
+#ifdef WIN32
+
+#include "windows.h"
+
+static inline void getClockCount (unsigned long cc[2])
+{
+    LARGE_INTEGER a;
+    QueryPerformanceCounter (&a);
+    cc[0] = a.LowPart;
+    cc[1] = a.HighPart;
+}
+
+
+static inline void serialize()
+{
+}
+
+
+static inline double loadClockCount (unsigned long cc[2])
+{
+    LARGE_INTEGER a;
+    a.LowPart = cc[0];
+    a.HighPart = cc[1];
+    return double(a.QuadPart);
+}
+
+
+double dTimerResolution()
+{
+    return 1.0/dTimerTicksPerSecond();
+}
+
+
+double dTimerTicksPerSecond()
+{
+    static int query=0;
+    static double hz=0.0;
+    if (!query) {
+        LARGE_INTEGER a;
+        QueryPerformanceFrequency (&a);
+        hz = double(a.QuadPart);
+        query = 1;
+    }
+    return hz;
+}
+
+#endif
+
+//****************************************************************************
+// implementation based on the pentium time stamp counter. the timer functions
+// can be serializing or non-serializing. serializing will ensure that all
+// instructions have executed and data has been written back before the cpu
+// time stamp counter is read. the CPUID instruction is used to serialize.
+
+#if defined(PENTIUM) && !defined(WIN32)
+
+// we need to know the clock rate so that the timing function can report
+// accurate times. this number only needs to be set accurately if we're
+// doing performance tests and care about real-world time numbers - otherwise,
+// just ignore this. i have not worked out how to determine this number
+// automatically yet.
+
+#define PENTIUM_HZ (500e6)
+
+static inline void getClockCount (unsigned long cc[2])
+{
+#ifndef X86_64_SYSTEM	
+    asm volatile (
+        "rdtsc\n"
+        "movl %%eax,(%%esi)\n"
+        "movl %%edx,4(%%esi)\n"
+        : : "S" (cc) : "%eax","%edx","cc","memory");
+#else
+    asm volatile (
+        "rdtsc\n"
+        "movl %%eax,(%%rsi)\n"
+        "movl %%edx,4(%%rsi)\n"
+        : : "S" (cc) : "%eax","%edx","cc","memory");
+#endif  
+}
+
+
+static inline void serialize()
+{
+#ifndef X86_64_SYSTEM
+    asm volatile (
+        "mov $0,%%eax\n"
+        "push %%ebx\n"
+        "cpuid\n"
+        "pop %%ebx\n"
+        : : : "%eax","%ecx","%edx","cc","memory");
+#else
+    asm volatile (
+        "mov $0,%%rax\n"
+        "push %%rbx\n"
+        "cpuid\n"
+        "pop %%rbx\n"
+        : : : "%rax","%rcx","%rdx","cc","memory");
+#endif
+}
+
+
+static inline double loadClockCount (unsigned long a[2])
+{
+    double ret;
+#ifndef X86_64_SYSTEM
+    asm volatile ("fildll %1; fstpl %0" : "=m" (ret) : "m" (a[0]) :
+    "cc","memory");
+#else
+    asm volatile ("fildll %1; fstpl %0" : "=m" (ret) : "m" (a[0]) :
+    "cc","memory");
+#endif  
+    return ret;
+}
+
+
+double dTimerResolution()
+{
+    return 1.0/PENTIUM_HZ;
+}
+
+
+double dTimerTicksPerSecond()
+{
+    return PENTIUM_HZ;
+}
+
+#endif
+
+//****************************************************************************
+// otherwise, do the implementation based on gettimeofday().
+
+#if !defined(PENTIUM) && !defined(WIN32)
+
+#ifndef macintosh
+
+#include <sys/time.h>
+#include <unistd.h>
+
+
+static inline void getClockCount (unsigned long cc[2])
+{
+    struct timeval tv;
+    gettimeofday (&tv,0);
+    cc[0] = tv.tv_usec;
+    cc[1] = tv.tv_sec;
+}
+
+#else // macintosh
+
+#include <CoreServices/CoreServices.h>
+#include <ode/Timer.h>
+
+static inline void getClockCount (unsigned long cc[2])
+{
+    UnsignedWide ms;
+    Microseconds (&ms);
+    cc[1] = ms.lo / 1000000;
+    cc[0] = ms.lo - ( cc[1] * 1000000 );
+}
+
+#endif
+
+
+static inline void serialize()
+{
+}
+
+
+static inline double loadClockCount (unsigned long a[2])
+{
+    return a[1]*1.0e6 + a[0];
+}
+
+
+double dTimerResolution()
+{
+    unsigned long cc1[2],cc2[2];
+    getClockCount (cc1);
+    do {
+        getClockCount (cc2);
+    }
+    while (cc1[0]==cc2[0] && cc1[1]==cc2[1]);
+    do {
+        getClockCount (cc1);
+    }
+    while (cc1[0]==cc2[0] && cc1[1]==cc2[1]);
+    double t1 = loadClockCount (cc1);
+    double t2 = loadClockCount (cc2);
+    return (t1-t2) / dTimerTicksPerSecond();
+}
+
+
+double dTimerTicksPerSecond()
+{
+    return 1000000;
+}
+
+#endif
+
+//****************************************************************************
+// stop watches
+
+void dStopwatchReset (dStopwatch *s)
+{
+    s->time = 0;
+    s->cc[0] = 0;
+    s->cc[1] = 0;
+}
+
+
+void dStopwatchStart (dStopwatch *s)
+{
+    serialize();
+    getClockCount (s->cc);
+}
+
+
+void dStopwatchStop  (dStopwatch *s)
+{
+    unsigned long cc[2];
+    serialize();
+    getClockCount (cc);
+    double t1 = loadClockCount (s->cc);
+    double t2 = loadClockCount (cc);
+    s->time += t2-t1;
+}
+
+
+double dStopwatchTime (dStopwatch *s)
+{
+    return s->time / dTimerTicksPerSecond();
+}
+
+//****************************************************************************
+// code timers
+
+// maximum number of events to record
+#define MAXNUM 100
+
+static int num = 0;		// number of entries used in event array
+static struct {
+    unsigned long cc[2];		// clock counts
+    double total_t;		// total clocks used in this slot.
+    double total_p;		// total percentage points used in this slot.
+    int count;			// number of times this slot has been updated.
+    const char *description;		// pointer to static string
+} event[MAXNUM];
+
+
+// make sure all slot totals and counts reset to 0 at start
+
+static void initSlots()
+{
+    static int initialized=0;
+    if (!initialized) {
+        for (int i=0; i<MAXNUM; i++) {
+            event[i].count = 0;
+            event[i].total_t = 0;
+            event[i].total_p = 0;
+        }
+        initialized = 1;
+    }
+}
+
+
+void dTimerStart (const char *description)
+{
+    initSlots();
+    event[0].description = const_cast<char*> (description);
+    num = 1;
+    serialize();
+    getClockCount (event[0].cc);
+}
+
+
+void dTimerNow (const char *description)
+{
+    if (num < MAXNUM) {
+        // do not serialize
+        getClockCount (event[num].cc);
+        event[num].description = const_cast<char*> (description);
+        num++;
+    }
+}
+
+
+void dTimerEnd()
+{
+    if (num < MAXNUM) {
+        serialize();
+        getClockCount (event[num].cc);
+        event[num].description = "TOTAL";
+        num++;
+    }
+}
+
+//****************************************************************************
+// print report
+
+static void fprintDoubleWithPrefix (FILE *f, double a, const char *fmt)
+{
+    if (a >= 0.999999) {
+        fprintf (f,fmt,a);
+        return;
+    }
+    a *= 1000.0;
+    if (a >= 0.999999) {
+        fprintf (f,fmt,a);
+        fprintf (f,"m");
+        return;
+    }
+    a *= 1000.0;
+    if (a >= 0.999999) {
+        fprintf (f,fmt,a);
+        fprintf (f,"u");
+        return;
+    }
+    a *= 1000.0;
+    fprintf (f,fmt,a);
+    fprintf (f,"n");
+}
+
+
+void dTimerReport (FILE *fout, int average)
+{
+    int i;
+    sizeint maxl;
+    double ccunit = 1.0/dTimerTicksPerSecond();
+    fprintf (fout,"\nTimer Report (");
+    fprintDoubleWithPrefix (fout,ccunit,"%.2f ");
+    fprintf (fout,"s resolution)\n------------\n");
+    if (num < 1) return;
+
+    // get maximum description length
+    maxl = 0;
+    for (i=0; i<num; i++) {
+        sizeint l = strlen (event[i].description);
+        if (l > maxl) maxl = l;
+    }
+
+    // calculate total time
+    double t1 = loadClockCount (event[0].cc);
+    double t2 = loadClockCount (event[num-1].cc);
+    double total = t2 - t1;
+    if (total <= 0) total = 1;
+
+    // compute time difference for all slots except the last one. update totals
+    double *times = (double*) ALLOCA (num * sizeof(double));
+    for (i=0; i < (num-1); i++) {
+        double t1 = loadClockCount (event[i].cc);
+        double t2 = loadClockCount (event[i+1].cc);
+        times[i] = t2 - t1;
+        event[i].count++;
+        event[i].total_t += times[i];
+        event[i].total_p += times[i]/total * 100.0;
+    }
+
+    // print report (with optional averages)
+    for (i=0; i<num; i++) {
+        double t,p;
+        if (i < (num-1)) {
+            t = times[i];
+            p = t/total * 100.0;
+        }
+        else {
+            t = total;
+            p = 100.0;
+        }
+        fprintf (fout,"%-*s %7.2fms %6.2f%%",(int)maxl,event[i].description,
+            t*ccunit * 1000.0, p);
+        if (average && i < (num-1)) {
+            fprintf (fout,"  (avg %7.2fms %6.2f%%)",
+                (event[i].total_t / event[i].count)*ccunit * 1000.0,
+                event[i].total_p / event[i].count);
+        }
+        fprintf (fout,"\n");
+    }
+    fprintf (fout,"\n");
+}
diff --git a/libs/ode-0.16.1/ode/src/typedefs.h b/libs/ode-0.16.1/ode/src/typedefs.h
new file mode 100644
index 0000000..c8164c3
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/typedefs.h
@@ -0,0 +1,74 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_TYPEDEFS_H_
+#define _ODE_TYPEDEFS_H_
+
+#include <ode/odeconfig.h>
+
+#include "error.h"
+
+
+/*
+ * Internal typedefs to map public types into more convenient private types
+ */
+
+
+typedef dint64 int64;
+dSASSERT(sizeof(int64) == 8);
+
+typedef duint64 uint64;
+dSASSERT(sizeof(uint64) == 8);
+
+typedef dint32 int32;
+dSASSERT(sizeof(int32) == 4);
+
+typedef duint32 uint32;
+dSASSERT(sizeof(uint32) == 4);
+
+typedef dint16 int16;
+dSASSERT(sizeof(int16) == 2);
+
+typedef duint16 uint16;
+dSASSERT(sizeof(uint16) == 2);
+
+typedef dint8 int8;
+dSASSERT(sizeof(int8) == 1);
+
+typedef duint8 uint8;
+dSASSERT(sizeof(uint8) == 1);
+
+
+typedef dintptr intptr;
+dSASSERT(sizeof(intptr) == sizeof(void *));
+
+typedef duintptr uintptr;
+dSASSERT(sizeof(uintptr) == sizeof(void *));
+
+typedef ddiffint diffint;
+dSASSERT(sizeof(diffint) == sizeof(void *)); // So far, we choose to not support systems that have accessible memory segment size smaller than the pointer size
+
+typedef dsizeint sizeint;
+dSASSERT(sizeof(sizeint) == sizeof(void *)); // So far, we choose to not support systems that have accessible memory segment size smaller than the pointer size
+
+
+#endif
diff --git a/libs/ode-0.16.1/ode/src/util.cpp b/libs/ode-0.16.1/ode/src/util.cpp
new file mode 100644
index 0000000..17b9e8a
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/util.cpp
@@ -0,0 +1,1231 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#include <ode/ode.h>
+#include "config.h"
+#include "util.h"
+#include "objects.h"
+#include "joints/joint.h"
+#include "threadingutils.h"
+
+#include <new>
+
+
+#define dMIN(A,B)  ((A)>(B) ? (B) : (A))
+#define dMAX(A,B)  ((B)>(A) ? (B) : (A))
+
+
+//****************************************************************************
+// Malloc based world stepping memory manager
+
+/*extern */dxWorldProcessMemoryManager g_WorldProcessMallocMemoryManager(dAlloc, dRealloc, dFree);
+/*extern */dxWorldProcessMemoryReserveInfo g_WorldProcessDefaultReserveInfo(dWORLDSTEP_RESERVEFACTOR_DEFAULT, dWORLDSTEP_RESERVESIZE_DEFAULT);
+
+
+//****************************************************************************
+// dxWorldProcessContext
+
+const char *const dxWorldProcessContext::m_aszContextMutexNames[dxPCM__MAX] = 
+{
+    "Stepper Arena Obtain Lock" , // dxPCM_STEPPER_ARENA_OBTAIN,
+    "Joint addLimot Serialize Lock" , // dxPCM_STEPPER_ADDLIMOT_SERIALIZE
+    "Stepper StepBody Serialize Lock" , // dxPCM_STEPPER_STEPBODY_SERIALIZE,
+};
+
+dxWorldProcessContext::dxWorldProcessContext():
+    m_pmaIslandsArena(NULL),
+    m_pmaStepperArenas(NULL),
+    m_pswObjectsAllocWorld(NULL),
+    m_pmgStepperMutexGroup(NULL),
+    m_pcwIslandsSteppingWait(NULL)
+{
+    // Do nothing
+}
+
+dxWorldProcessContext::~dxWorldProcessContext()
+{
+    dIASSERT((m_pswObjectsAllocWorld != NULL) == (m_pmgStepperMutexGroup != NULL));
+    dIASSERT((m_pswObjectsAllocWorld != NULL) == (m_pcwIslandsSteppingWait != NULL));
+
+    if (m_pswObjectsAllocWorld != NULL)
+    {
+        m_pswObjectsAllocWorld->FreeMutexGroup(m_pmgStepperMutexGroup);
+        // m_pswObjectsAllocWorld->FreeThreadedCallWait(m_pcwIslandsSteppingWait); -- The stock call wait can not be freed
+    }
+
+    dxWorldProcessMemArena *pmaStepperArenas = m_pmaStepperArenas;
+    if (pmaStepperArenas != NULL)
+    {
+        FreeArenasList(pmaStepperArenas);
+    }
+
+    if (m_pmaIslandsArena != NULL)
+    {
+        dxWorldProcessMemArena::FreeMemArena(m_pmaIslandsArena);
+    }
+}
+
+void dxWorldProcessContext::CleanupWorldReferences(dxWorld *pswWorldInstance)
+{
+    dIASSERT((m_pswObjectsAllocWorld != NULL) == (m_pmgStepperMutexGroup != NULL));
+    dIASSERT((m_pswObjectsAllocWorld != NULL) == (m_pcwIslandsSteppingWait != NULL));
+
+    if (m_pswObjectsAllocWorld == pswWorldInstance)
+    {
+        m_pswObjectsAllocWorld->FreeMutexGroup(m_pmgStepperMutexGroup);
+        // m_pswObjectsAllocWorld->FreeThreadedCallWait(m_pcwIslandsSteppingWait); -- The stock call wait can not be freed
+
+        m_pswObjectsAllocWorld = NULL;
+        m_pmgStepperMutexGroup = NULL;
+        m_pcwIslandsSteppingWait = NULL;
+    }
+}
+
+bool dxWorldProcessContext::EnsureStepperSyncObjectsAreAllocated(dxWorld *pswWorldInstance)
+{
+    dIASSERT((m_pswObjectsAllocWorld != NULL) == (m_pmgStepperMutexGroup != NULL));
+    dIASSERT((m_pswObjectsAllocWorld != NULL) == (m_pcwIslandsSteppingWait != NULL));
+
+    bool bResult = false;
+
+    dMutexGroupID pmbStepperMutexGroup = NULL;
+    bool bStepperMutexGroupAllocated = false;
+
+    do
+    {
+        if (m_pswObjectsAllocWorld == NULL)
+        {
+            pmbStepperMutexGroup = pswWorldInstance->AllocMutexGroup(dxPCM__MAX, m_aszContextMutexNames);
+            if (pmbStepperMutexGroup == NULL)
+            {
+                break;
+            }
+
+            bStepperMutexGroupAllocated = true;
+
+            dCallWaitID pcwIslandsSteppingWait = pswWorldInstance->AllocateOrRetrieveStockCallWaitID();
+            if (pcwIslandsSteppingWait == NULL)
+            {
+                break;
+            }
+
+            m_pswObjectsAllocWorld = pswWorldInstance;
+            m_pmgStepperMutexGroup = pmbStepperMutexGroup;
+            m_pcwIslandsSteppingWait = pcwIslandsSteppingWait;
+        }
+
+        bResult = true;
+    }
+    while (false);
+
+    if (!bResult)
+    {
+        if (bStepperMutexGroupAllocated)
+        {
+            pswWorldInstance->FreeMutexGroup(pmbStepperMutexGroup);
+        }
+    }
+
+    return bResult;
+}
+
+
+dxWorldProcessMemArena *dxWorldProcessContext::ObtainStepperMemArena()
+{
+    dxWorldProcessMemArena *pmaArenaInstance = NULL;
+
+    while (true)
+    {
+        dxWorldProcessMemArena *pmaRawArenasHead = GetStepperArenasHead();
+        if (pmaRawArenasHead == NULL)
+        {
+            break;
+        }
+
+        // Extraction must be locked so that other thread does not "steal" head arena,
+        // use it and then reinsert back with a different "next"
+        dxMutexGroupLockHelper lhLockHelper(m_pswObjectsAllocWorld, m_pmgStepperMutexGroup, dxPCM_STEPPER_ARENA_OBTAIN);
+
+        dxWorldProcessMemArena *pmaArenasHead = GetStepperArenasHead(); // Arenas head must be re-extracted after mutex has been locked
+        bool bExchangeResult = pmaArenasHead != NULL && TryExtractingStepperArenasHead(pmaArenasHead);
+
+        lhLockHelper.UnlockMutex();
+
+        if (bExchangeResult)
+        {
+            pmaArenasHead->ResetState();
+            pmaArenaInstance = pmaArenasHead;
+            break;
+        }
+    }
+
+    return pmaArenaInstance;
+}
+
+void dxWorldProcessContext::ReturnStepperMemArena(dxWorldProcessMemArena *pmaArenaInstance)
+{
+    while (true)
+    {
+        dxWorldProcessMemArena *pmaArenasHead = GetStepperArenasHead();
+        pmaArenaInstance->SetNextMemArena(pmaArenasHead);
+
+        if (TryInsertingStepperArenasHead(pmaArenaInstance, pmaArenasHead))
+        {
+            break;
+        }
+    }
+}
+
+
+dxWorldProcessMemArena *dxWorldProcessContext::ReallocateIslandsMemArena(sizeint nMemoryRequirement, 
+    const dxWorldProcessMemoryManager *pmmMemortManager, float fReserveFactor, unsigned uiReserveMinimum)
+{
+    dxWorldProcessMemArena *pmaExistingArena = GetIslandsMemArena();
+    dxWorldProcessMemArena *pmaNewMemArena = dxWorldProcessMemArena::ReallocateMemArena(pmaExistingArena, nMemoryRequirement, pmmMemortManager, fReserveFactor, uiReserveMinimum);
+    SetIslandsMemArena(pmaNewMemArena);
+
+    pmaNewMemArena->ResetState();
+
+    return pmaNewMemArena;
+}
+
+bool dxWorldProcessContext::ReallocateStepperMemArenas(
+    dxWorld *world, unsigned nIslandThreadsCount, sizeint nMemoryRequirement, 
+    const dxWorldProcessMemoryManager *pmmMemortManager, float fReserveFactor, unsigned uiReserveMinimum)
+{
+    dxWorldProcessMemArena *pmaRebuiltArenasHead = NULL, *pmaRebuiltArenasTail = NULL;
+    dxWorldProcessMemArena *pmaExistingArenas = GetStepperArenasList();
+    unsigned nArenasToProcess = nIslandThreadsCount;
+
+    (void)world; // unused
+
+    // NOTE!
+    // The list is reallocated in a way to assure the largest arenas are at end 
+    // and if number of threads decreases they will be freed first of all.
+
+    while (true)
+    {
+        if (nArenasToProcess == 0)
+        {
+            FreeArenasList(pmaExistingArenas);
+            break;
+        }
+
+        dxWorldProcessMemArena *pmaOldMemArena = pmaExistingArenas;
+
+        if (pmaExistingArenas != NULL)
+        {
+            pmaExistingArenas = pmaExistingArenas->GetNextMemArena();
+        }
+        else
+        {
+            // If existing arenas ended, terminate and erase tail so that new arenas 
+            // would be appended to list head.
+            if (pmaRebuiltArenasTail != NULL)
+            {
+                pmaRebuiltArenasTail->SetNextMemArena(NULL);
+                pmaRebuiltArenasTail = NULL;
+            }
+        }
+
+        dxWorldProcessMemArena *pmaNewMemArena = dxWorldProcessMemArena::ReallocateMemArena(pmaOldMemArena, nMemoryRequirement, pmmMemortManager, fReserveFactor, uiReserveMinimum);
+
+        if (pmaNewMemArena != NULL)
+        {
+            // Append reallocated arenas to list tail while old arenas still exist...
+            if (pmaRebuiltArenasTail != NULL)
+            {
+                pmaRebuiltArenasTail->SetNextMemArena(pmaNewMemArena);
+                pmaRebuiltArenasTail = pmaNewMemArena;
+            }
+            else if (pmaRebuiltArenasHead == NULL)
+            {
+                pmaRebuiltArenasHead = pmaNewMemArena;
+                pmaRebuiltArenasTail = pmaNewMemArena;
+            }
+            // ...and append them to list head if those are additional arenas created
+            else
+            {
+                pmaNewMemArena->SetNextMemArena(pmaRebuiltArenasHead);
+                pmaRebuiltArenasHead = pmaNewMemArena;
+            }
+
+            --nArenasToProcess;
+        }
+        else if (pmaOldMemArena == NULL)
+        {
+            break;
+        }
+    }
+
+    if (pmaRebuiltArenasTail != NULL)
+    {
+        pmaRebuiltArenasTail->SetNextMemArena(NULL);
+    }
+
+    SetStepperArenasList(pmaRebuiltArenasHead);
+
+    bool bResult = nArenasToProcess == 0;
+    return bResult;
+}
+
+void dxWorldProcessContext::FreeArenasList(dxWorldProcessMemArena *pmaExistingArenas)
+{
+    while (pmaExistingArenas != NULL)
+    {
+        dxWorldProcessMemArena *pmaCurrentMemArena = pmaExistingArenas;
+        pmaExistingArenas = pmaExistingArenas->GetNextMemArena();
+
+        dxWorldProcessMemArena::FreeMemArena(pmaCurrentMemArena);
+    }
+}
+
+dxWorldProcessMemArena *dxWorldProcessContext::GetStepperArenasHead() const
+{
+    return m_pmaStepperArenas;
+}
+
+bool dxWorldProcessContext::TryExtractingStepperArenasHead(dxWorldProcessMemArena *pmaHeadInstance)
+{
+    dxWorldProcessMemArena *pmaNextInstance = pmaHeadInstance->GetNextMemArena();
+    return ThrsafeCompareExchangePointer((volatile atomicptr *)&m_pmaStepperArenas, (atomicptr)pmaHeadInstance, (atomicptr)pmaNextInstance);
+}
+
+bool dxWorldProcessContext::TryInsertingStepperArenasHead(dxWorldProcessMemArena *pmaArenaInstance, dxWorldProcessMemArena *pmaExistingHead)
+{
+    return ThrsafeCompareExchangePointer((volatile atomicptr *)&m_pmaStepperArenas, (atomicptr)pmaExistingHead, (atomicptr)pmaArenaInstance);
+}
+
+
+void dxWorldProcessContext::LockForAddLimotSerialization()
+{
+    m_pswObjectsAllocWorld->LockMutexGroupMutex(m_pmgStepperMutexGroup, dxPCM_STEPPER_ADDLIMOT_SERIALIZE);
+}
+
+void dxWorldProcessContext::UnlockForAddLimotSerialization()
+{
+    m_pswObjectsAllocWorld->UnlockMutexGroupMutex(m_pmgStepperMutexGroup, dxPCM_STEPPER_ADDLIMOT_SERIALIZE);
+}
+
+
+void dxWorldProcessContext::LockForStepbodySerialization()
+{
+    m_pswObjectsAllocWorld->LockMutexGroupMutex(m_pmgStepperMutexGroup, dxPCM_STEPPER_STEPBODY_SERIALIZE);
+}
+
+void dxWorldProcessContext::UnlockForStepbodySerialization()
+{
+    m_pswObjectsAllocWorld->UnlockMutexGroupMutex(m_pmgStepperMutexGroup, dxPCM_STEPPER_STEPBODY_SERIALIZE);
+}
+
+
+//****************************************************************************
+// Threading call contexts
+
+struct dxSingleIslandCallContext;
+
+struct dxIslandsProcessingCallContext
+{
+    dxIslandsProcessingCallContext(dxWorld *world, const dxWorldProcessIslandsInfo &islandsInfo, dReal stepSize, dstepper_fn_t stepper):
+        m_world(world), m_islandsInfo(islandsInfo), m_stepSize(stepSize), m_stepper(stepper),
+        m_groupReleasee(NULL), m_islandToProcessStorage(0), m_stepperAllowedThreads(0)
+    {
+    }
+
+    void AssignGroupReleasee(dCallReleaseeID groupReleasee) { m_groupReleasee = groupReleasee; }
+    void SetStepperAllowedThreads(unsigned allowedThreadsLimit) { m_stepperAllowedThreads = allowedThreadsLimit; }
+
+    static int ThreadedProcessGroup_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    bool ThreadedProcessGroup();
+
+    static int ThreadedProcessJobStart_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    void ThreadedProcessJobStart();
+
+    static int ThreadedProcessIslandSearch_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    void ThreadedProcessIslandSearch(dxSingleIslandCallContext *stepperCallContext);
+
+    static int ThreadedProcessIslandStepper_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee);
+    void ThreadedProcessIslandStepper(dxSingleIslandCallContext *stepperCallContext);
+
+    sizeint ObtainNextIslandToBeProcessed(sizeint islandsCount);
+
+    dxWorld                         *const m_world;
+    dxWorldProcessIslandsInfo const &m_islandsInfo;
+    dReal                           const m_stepSize;
+    dstepper_fn_t                   const m_stepper;
+    dCallReleaseeID                 m_groupReleasee;
+    sizeint                          volatile m_islandToProcessStorage;
+    unsigned                        m_stepperAllowedThreads;
+};
+
+
+struct dxSingleIslandCallContext
+{
+    dxSingleIslandCallContext(dxIslandsProcessingCallContext *islandsProcessingContext, 
+        dxWorldProcessMemArena *stepperArena, void *arenaInitialState, 
+        dxBody *const *islandBodiesStart, dxJoint *const *islandJointsStart):
+        m_islandsProcessingContext(islandsProcessingContext), m_islandIndex(0), 
+        m_stepperArena(stepperArena), m_arenaInitialState(arenaInitialState), 
+        m_stepperCallContext(islandsProcessingContext->m_world, islandsProcessingContext->m_stepSize, islandsProcessingContext->m_stepperAllowedThreads, stepperArena, islandBodiesStart, islandJointsStart)
+    {
+    }
+
+    void AssignIslandSearchProgress(sizeint islandIndex)
+    {
+        m_islandIndex = islandIndex; 
+    }
+
+    void AssignIslandSelection(dxBody *const *islandBodiesStart, dxJoint *const *islandJointsStart, 
+        unsigned islandBodiesCount, unsigned islandJointsCount)
+    {
+        m_stepperCallContext.AssignIslandSelection(islandBodiesStart, islandJointsStart, islandBodiesCount, islandJointsCount);
+    }
+
+    dxBody *const *GetSelectedIslandBodiesEnd() const { return m_stepperCallContext.GetSelectedIslandBodiesEnd(); }
+    dxJoint *const *GetSelectedIslandJointsEnd() const { return m_stepperCallContext.GetSelectedIslandJointsEnd(); }
+    
+    void RestoreSavedMemArenaStateForStepper()
+    {
+        m_stepperArena->RestoreState(m_arenaInitialState);
+    }
+
+    void AssignStepperCallFinalReleasee(dCallReleaseeID finalReleasee)
+    {
+        m_stepperCallContext.AssignStepperCallFinalReleasee(finalReleasee);
+    }
+
+    dxIslandsProcessingCallContext  *m_islandsProcessingContext;
+    sizeint                          m_islandIndex;
+    dxWorldProcessMemArena          *m_stepperArena;
+    void                            *m_arenaInitialState;
+    dxStepperProcessingCallContext  m_stepperCallContext;
+};
+
+
+//****************************************************************************
+// Auto disabling
+
+void dInternalHandleAutoDisabling (dxWorld *world, dReal stepsize)
+{
+    dxBody *bb;
+    for ( bb=world->firstbody; bb; bb=(dxBody*)bb->next )
+    {
+        // don't freeze objects mid-air (patch 1586738)
+        if ( bb->firstjoint == NULL ) continue;
+
+        // nothing to do unless this body is currently enabled and has
+        // the auto-disable flag set
+        if ( (bb->flags & (dxBodyAutoDisable|dxBodyDisabled)) != dxBodyAutoDisable ) continue;
+
+        // if sampling / threshold testing is disabled, we can never sleep.
+        if ( bb->adis.average_samples == 0 ) continue;
+
+        //
+        // see if the body is idle
+        //
+
+#ifndef dNODEBUG
+        // sanity check
+        if ( bb->average_counter >= bb->adis.average_samples )
+        {
+            dUASSERT( bb->average_counter < bb->adis.average_samples, "buffer overflow" );
+
+            // something is going wrong, reset the average-calculations
+            bb->average_ready = 0; // not ready for average calculation
+            bb->average_counter = 0; // reset the buffer index
+        }
+#endif // dNODEBUG
+
+        // sample the linear and angular velocity
+        bb->average_lvel_buffer[bb->average_counter][0] = bb->lvel[0];
+        bb->average_lvel_buffer[bb->average_counter][1] = bb->lvel[1];
+        bb->average_lvel_buffer[bb->average_counter][2] = bb->lvel[2];
+        bb->average_avel_buffer[bb->average_counter][0] = bb->avel[0];
+        bb->average_avel_buffer[bb->average_counter][1] = bb->avel[1];
+        bb->average_avel_buffer[bb->average_counter][2] = bb->avel[2];
+        bb->average_counter++;
+
+        // buffer ready test
+        if ( bb->average_counter >= bb->adis.average_samples )
+        {
+            bb->average_counter = 0; // fill the buffer from the beginning
+            bb->average_ready = 1; // this body is ready now for average calculation
+        }
+
+        int idle = 0; // Assume it's in motion unless we have samples to disprove it.
+
+        // enough samples?
+        if ( bb->average_ready )
+        {
+            idle = 1; // Initial assumption: IDLE
+
+            // the sample buffers are filled and ready for calculation
+            dVector3 average_lvel, average_avel;
+
+            // Store first velocity samples
+            average_lvel[0] = bb->average_lvel_buffer[0][0];
+            average_avel[0] = bb->average_avel_buffer[0][0];
+            average_lvel[1] = bb->average_lvel_buffer[0][1];
+            average_avel[1] = bb->average_avel_buffer[0][1];
+            average_lvel[2] = bb->average_lvel_buffer[0][2];
+            average_avel[2] = bb->average_avel_buffer[0][2];
+
+            // If we're not in "instantaneous mode"
+            if ( bb->adis.average_samples > 1 )
+            {
+                // add remaining velocities together
+                for ( unsigned int i = 1; i < bb->adis.average_samples; ++i )
+                {
+                    average_lvel[0] += bb->average_lvel_buffer[i][0];
+                    average_avel[0] += bb->average_avel_buffer[i][0];
+                    average_lvel[1] += bb->average_lvel_buffer[i][1];
+                    average_avel[1] += bb->average_avel_buffer[i][1];
+                    average_lvel[2] += bb->average_lvel_buffer[i][2];
+                    average_avel[2] += bb->average_avel_buffer[i][2];
+                }
+
+                // make average
+                dReal r1 = dReal( 1.0 ) / dReal( bb->adis.average_samples );
+
+                average_lvel[0] *= r1;
+                average_avel[0] *= r1;
+                average_lvel[1] *= r1;
+                average_avel[1] *= r1;
+                average_lvel[2] *= r1;
+                average_avel[2] *= r1;
+            }
+
+            // threshold test
+            dReal av_lspeed, av_aspeed;
+            av_lspeed = dCalcVectorDot3( average_lvel, average_lvel );
+            if ( av_lspeed > bb->adis.linear_average_threshold )
+            {
+                idle = 0; // average linear velocity is too high for idle
+            }
+            else
+            {
+                av_aspeed = dCalcVectorDot3( average_avel, average_avel );
+                if ( av_aspeed > bb->adis.angular_average_threshold )
+                {
+                    idle = 0; // average angular velocity is too high for idle
+                }
+            }
+        }
+
+        // if it's idle, accumulate steps and time.
+        // these counters won't overflow because this code doesn't run for disabled bodies.
+        if (idle) {
+            bb->adis_stepsleft--;
+            bb->adis_timeleft -= stepsize;
+        }
+        else {
+            // Reset countdowns
+            bb->adis_stepsleft = bb->adis.idle_steps;
+            bb->adis_timeleft = bb->adis.idle_time;
+        }
+
+        // disable the body if it's idle for a long enough time
+        if ( bb->adis_stepsleft <= 0 && bb->adis_timeleft <= 0 )
+        {
+            bb->flags |= dxBodyDisabled; // set the disable flag
+
+            // disabling bodies should also include resetting the velocity
+            // should prevent jittering in big "islands"
+            bb->lvel[0] = 0;
+            bb->lvel[1] = 0;
+            bb->lvel[2] = 0;
+            bb->avel[0] = 0;
+            bb->avel[1] = 0;
+            bb->avel[2] = 0;
+        }
+    }
+}
+
+
+//****************************************************************************
+// body rotation
+
+// return sin(x)/x. this has a singularity at 0 so special handling is needed
+// for small arguments.
+
+static inline dReal sinc (dReal x)
+{
+    // if |x| < 1e-4 then use a taylor series expansion. this two term expansion
+    // is actually accurate to one LS bit within this range if double precision
+    // is being used - so don't worry!
+    if (dFabs(x) < 1.0e-4) return REAL(1.0) - x*x*REAL(0.166666666666666666667);
+    else return dSin(x)/x;
+}
+
+
+// given a body b, apply its linear and angular rotation over the time
+// interval h, thereby adjusting its position and orientation.
+
+void dxStepBody (dxBody *b, dReal h)
+{
+    // cap the angular velocity
+    if (b->flags & dxBodyMaxAngularSpeed) {
+        const dReal max_ang_speed = b->max_angular_speed;
+        const dReal aspeed = dCalcVectorDot3( b->avel, b->avel );
+        if (aspeed > max_ang_speed*max_ang_speed) {
+            const dReal coef = max_ang_speed/dSqrt(aspeed);
+            dScaleVector3(b->avel, coef);
+        }
+    }
+    // end of angular velocity cap
+
+
+    // handle linear velocity
+    for (unsigned int j=0; j<3; j++) b->posr.pos[j] += h * b->lvel[j];
+
+    if (b->flags & dxBodyFlagFiniteRotation) {
+        dVector3 irv;	// infitesimal rotation vector
+        dQuaternion q;	// quaternion for finite rotation
+
+        if (b->flags & dxBodyFlagFiniteRotationAxis) {
+            // split the angular velocity vector into a component along the finite
+            // rotation axis, and a component orthogonal to it.
+            dVector3 frv;		// finite rotation vector
+            dReal k = dCalcVectorDot3 (b->finite_rot_axis,b->avel);
+            frv[0] = b->finite_rot_axis[0] * k;
+            frv[1] = b->finite_rot_axis[1] * k;
+            frv[2] = b->finite_rot_axis[2] * k;
+            irv[0] = b->avel[0] - frv[0];
+            irv[1] = b->avel[1] - frv[1];
+            irv[2] = b->avel[2] - frv[2];
+
+            // make a rotation quaternion q that corresponds to frv * h.
+            // compare this with the full-finite-rotation case below.
+            h *= REAL(0.5);
+            dReal theta = k * h;
+            q[0] = dCos(theta);
+            dReal s = sinc(theta) * h;
+            q[1] = frv[0] * s;
+            q[2] = frv[1] * s;
+            q[3] = frv[2] * s;
+        }
+        else {
+            // make a rotation quaternion q that corresponds to w * h
+            dReal wlen = dSqrt (b->avel[0]*b->avel[0] + b->avel[1]*b->avel[1] +
+                b->avel[2]*b->avel[2]);
+            h *= REAL(0.5);
+            dReal theta = wlen * h;
+            q[0] = dCos(theta);
+            dReal s = sinc(theta) * h;
+            q[1] = b->avel[0] * s;
+            q[2] = b->avel[1] * s;
+            q[3] = b->avel[2] * s;
+        }
+
+        // do the finite rotation
+        dQuaternion q2;
+        dQMultiply0 (q2,q,b->q);
+        for (unsigned int j=0; j<4; j++) b->q[j] = q2[j];
+
+        // do the infitesimal rotation if required
+        if (b->flags & dxBodyFlagFiniteRotationAxis) {
+            dReal dq[4];
+            dWtoDQ (irv,b->q,dq);
+            for (unsigned int j=0; j<4; j++) b->q[j] += h * dq[j];
+        }
+    }
+    else {
+        // the normal way - do an infitesimal rotation
+        dReal dq[4];
+        dWtoDQ (b->avel,b->q,dq);
+        for (unsigned int j=0; j<4; j++) b->q[j] += h * dq[j];
+    }
+
+    // normalize the quaternion and convert it to a rotation matrix
+    dNormalize4 (b->q);
+    dQtoR (b->q,b->posr.R);
+
+    // notify all attached geoms that this body has moved
+    dxWorldProcessContext *world_process_context = b->world->unsafeGetWorldProcessingContext(); 
+    for (dxGeom *geom = b->geom; geom; geom = dGeomGetBodyNext (geom)) {
+        world_process_context->LockForStepbodySerialization();
+        dGeomMoved (geom);
+        world_process_context->UnlockForStepbodySerialization();
+    }
+
+    // notify the user
+    if (b->moved_callback != NULL) {
+        b->moved_callback(b);
+    }
+
+    // damping
+    if (b->flags & dxBodyLinearDamping) {
+        const dReal lin_threshold = b->dampingp.linear_threshold;
+        const dReal lin_speed = dCalcVectorDot3( b->lvel, b->lvel );
+        if ( lin_speed > lin_threshold) {
+            const dReal k = 1 - b->dampingp.linear_scale;
+            dScaleVector3(b->lvel, k);
+        }
+    }
+    if (b->flags & dxBodyAngularDamping) {
+        const dReal ang_threshold = b->dampingp.angular_threshold;
+        const dReal ang_speed = dCalcVectorDot3( b->avel, b->avel );
+        if ( ang_speed > ang_threshold) {
+            const dReal k = 1 - b->dampingp.angular_scale;
+            dScaleVector3(b->avel, k);
+        }
+    }
+}
+
+
+//****************************************************************************
+// island processing
+
+enum dxISLANDSIZESELEMENT
+{
+    dxISE_BODIES_COUNT,
+    dxISE_JOINTS_COUNT,
+
+    dxISE__MAX
+};
+
+// This estimates dynamic memory requirements for dxProcessIslands
+static sizeint EstimateIslandProcessingMemoryRequirements(dxWorld *world)
+{
+    sizeint res = 0;
+
+    sizeint islandcounts = dEFFICIENT_SIZE((sizeint)(unsigned)world->nb * 2 * sizeof(int));
+    res += islandcounts;
+
+    sizeint bodiessize = dEFFICIENT_SIZE((sizeint)(unsigned)world->nb * sizeof(dxBody*));
+    sizeint jointssize = dEFFICIENT_SIZE((sizeint)(unsigned)world->nj * sizeof(dxJoint*));
+    res += bodiessize + jointssize;
+
+    sizeint sesize = (bodiessize < jointssize) ? bodiessize : jointssize;
+    res += sesize;
+
+    return res;
+}
+
+static sizeint BuildIslandsAndEstimateStepperMemoryRequirements(
+    dxWorldProcessIslandsInfo &islandsinfo, dxWorldProcessMemArena *memarena, 
+    dxWorld *world, dReal stepsize, dmemestimate_fn_t stepperestimate)
+{
+    sizeint maxreq = 0;
+
+    // handle auto-disabling of bodies
+    dInternalHandleAutoDisabling (world,stepsize);
+
+    unsigned int nb = world->nb, nj = world->nj;
+    // Make array for island body/joint counts
+    unsigned int *islandsizes = memarena->AllocateArray<unsigned int>(2 * (sizeint)nb);
+    unsigned int *sizescurr;
+
+    // make arrays for body and joint lists (for a single island) to go into
+    dxBody **body = memarena->AllocateArray<dxBody *>(nb);
+    dxJoint **joint = memarena->AllocateArray<dxJoint *>(nj);
+
+    BEGIN_STATE_SAVE(memarena, stackstate) {
+        // allocate a stack of unvisited bodies in the island. the maximum size of
+        // the stack can be the lesser of the number of bodies or joints, because
+        // new bodies are only ever added to the stack by going through untagged
+        // joints. all the bodies in the stack must be tagged!
+        unsigned int stackalloc = (nj < nb) ? nj : nb;
+        dxBody **stack = memarena->AllocateArray<dxBody *>(stackalloc);
+
+        {
+            // set all body/joint tags to 0
+            for (dxBody *b=world->firstbody; b; b=(dxBody*)b->next) b->tag = 0;
+            for (dxJoint *j=world->firstjoint; j; j=(dxJoint*)j->next) j->tag = 0;
+        }
+
+        sizescurr = islandsizes;
+        dxBody **bodystart = body;
+        dxJoint **jointstart = joint;
+        for (dxBody *bb=world->firstbody; bb; bb=(dxBody*)bb->next) {
+            // get bb = the next enabled, untagged body, and tag it
+            if (!bb->tag) {
+                if (!(bb->flags & dxBodyDisabled)) {
+                    bb->tag = 1;
+
+                    dxBody **bodycurr = bodystart;
+                    dxJoint **jointcurr = jointstart;
+
+                    // tag all bodies and joints starting from bb.
+                    *bodycurr++ = bb;
+
+                    unsigned int stacksize = 0;
+                    dxBody *b = bb;
+
+                    while (true) {
+                        // traverse and tag all body's joints, add untagged connected bodies
+                        // to stack
+                        for (dxJointNode *n=b->firstjoint; n; n=n->next) {
+                            dxJoint *njoint = n->joint;
+                            if (!njoint->tag) {
+                                if (njoint->isEnabled()) {
+                                    njoint->tag = 1;
+                                    *jointcurr++ = njoint;
+
+                                    dxBody *nbody = n->body;
+                                    // Body disabled flag is not checked here. This is how auto-enable works.
+                                    if (nbody && nbody->tag <= 0) {
+                                        nbody->tag = 1;
+                                        // Make sure all bodies are in the enabled state.
+                                        nbody->flags &= ~dxBodyDisabled;
+                                        stack[stacksize++] = nbody;
+                                    }
+                                } else {
+                                    njoint->tag = -1; // Used in Step to prevent search over disabled joints (not needed for QuickStep so far)
+                                }
+                            }
+                        }
+                        dIASSERT(stacksize <= (unsigned int)world->nb);
+                        dIASSERT(stacksize <= (unsigned int)world->nj);
+
+                        if (stacksize == 0) {
+                            break;
+                        }
+
+                        b = stack[--stacksize];	// pop body off stack
+                        *bodycurr++ = b;	// put body on body list
+                    }
+
+                    unsigned int bcount = (unsigned int)(bodycurr - bodystart);
+                    unsigned int jcount = (unsigned int)(jointcurr - jointstart);
+                    dIASSERT((sizeint)(bodycurr - bodystart) <= (sizeint)UINT_MAX);
+                    dIASSERT((sizeint)(jointcurr - jointstart) <= (sizeint)UINT_MAX);
+
+                    sizescurr[dxISE_BODIES_COUNT] = bcount;
+                    sizescurr[dxISE_JOINTS_COUNT] = jcount;
+                    sizescurr += dxISE__MAX;
+
+                    sizeint islandreq = stepperestimate(bodystart, bcount, jointstart, jcount);
+                    maxreq = (maxreq > islandreq) ? maxreq : islandreq;
+
+                    bodystart = bodycurr;
+                    jointstart = jointcurr;
+                } else {
+                    bb->tag = -1; // Not used so far (assigned to retain consistency with joints)
+                }
+            }
+        }
+    } END_STATE_SAVE(memarena, stackstate);
+
+# ifndef dNODEBUG
+    // if debugging, check that all objects (except for disabled bodies,
+    // unconnected joints, and joints that are connected to disabled bodies)
+    // were tagged.
+    {
+        for (dxBody *b=world->firstbody; b; b=(dxBody*)b->next) {
+            if (b->flags & dxBodyDisabled) {
+                if (b->tag > 0) dDebug (0,"disabled body tagged");
+            }
+            else {
+                if (b->tag <= 0) dDebug (0,"enabled body not tagged");
+            }
+        }
+        for (dxJoint *j=world->firstjoint; j; j=(dxJoint*)j->next) {
+            if ( (( j->node[0].body && (j->node[0].body->flags & dxBodyDisabled)==0 ) ||
+                (j->node[1].body && (j->node[1].body->flags & dxBodyDisabled)==0) )
+                && 
+                j->isEnabled() ) {
+                    if (j->tag <= 0) dDebug (0,"attached enabled joint not tagged");
+            }
+            else {
+                if (j->tag > 0) dDebug (0,"unattached or disabled joint tagged");
+            }
+        }
+    }
+# endif
+
+    sizeint islandcount = ((sizeint)(sizescurr - islandsizes) / dxISE__MAX);
+    islandsinfo.AssignInfo(islandcount, islandsizes, body, joint);
+
+    return maxreq;
+}
+
+static unsigned EstimateIslandProcessingSimultaneousCallsMaximumCount(unsigned activeThreadCount, unsigned islandsAllowedThreadCount, 
+    unsigned stepperAllowedThreadCount, dmaxcallcountestimate_fn_t maxCallCountEstimator)
+{
+    unsigned stepperCallsMaximum = maxCallCountEstimator(activeThreadCount, stepperAllowedThreadCount);
+    unsigned islandsIntermediateCallsMaximum = (1 + 2); // ThreadedProcessIslandSearch_Callback + (ThreadedProcessIslandStepper_Callback && ThreadedProcessIslandSearch_Callback)
+
+    unsigned result = 
+        1 // ThreadedProcessGroup_Callback
+        + islandsAllowedThreadCount * dMAX(stepperCallsMaximum, islandsIntermediateCallsMaximum)
+        + dMIN(islandsAllowedThreadCount, (unsigned)(activeThreadCount - islandsAllowedThreadCount)) // ThreadedProcessJobStart_Callback
+        /*...the end*/;
+    return result;
+}
+
+// this groups all joints and bodies in a world into islands. all objects
+// in an island are reachable by going through connected bodies and joints.
+// each island can be simulated separately.
+// note that joints that are not attached to anything will not be included
+// in any island, an so they do not affect the simulation.
+//
+// this function starts new island from unvisited bodies. however, it will
+// never start a new islands from a disabled body. thus islands of disabled
+// bodies will not be included in the simulation. disabled bodies are
+// re-enabled if they are found to be part of an active island.
+bool dxProcessIslands (dxWorld *world, const dxWorldProcessIslandsInfo &islandsInfo, 
+    dReal stepSize, dstepper_fn_t stepper, dmaxcallcountestimate_fn_t maxCallCountEstimator)
+{
+    bool result = false;
+
+    dxIslandsProcessingCallContext callContext(world, islandsInfo, stepSize, stepper);
+
+    do {
+        dxStepWorkingMemory *wmem = world->wmem;
+        dIASSERT(wmem != NULL);
+        dxWorldProcessContext *context = wmem->GetWorldProcessingContext(); 
+        dIASSERT(context != NULL);
+        dCallWaitID pcwGroupCallWait = context->GetIslandsSteppingWait();
+
+        int summaryFault = 0;
+
+        unsigned activeThreadCount;
+        const unsigned islandsAllowedThreadCount = world->calculateIslandProcessingMaxThreadCount(&activeThreadCount);
+        dIASSERT(islandsAllowedThreadCount != 0);
+        dIASSERT(activeThreadCount >= islandsAllowedThreadCount);
+
+        unsigned stepperAllowedThreadCount = islandsAllowedThreadCount; // For now, set stepper allowed threads equal to island stepping threads
+
+        unsigned simultaneousCallsCount = EstimateIslandProcessingSimultaneousCallsMaximumCount(activeThreadCount, islandsAllowedThreadCount, stepperAllowedThreadCount, maxCallCountEstimator);
+        if (!world->PreallocateResourcesForThreadedCalls(simultaneousCallsCount)) {
+            break;
+        }
+
+        dCallReleaseeID groupReleasee;
+        // First post a group call with dependency count set to number of expected threads
+        world->PostThreadedCall(&summaryFault, &groupReleasee, islandsAllowedThreadCount, NULL, pcwGroupCallWait, 
+            &dxIslandsProcessingCallContext::ThreadedProcessGroup_Callback, (void *)&callContext, 0, "World Islands Stepping Group");
+
+        callContext.AssignGroupReleasee(groupReleasee);
+        callContext.SetStepperAllowedThreads(stepperAllowedThreadCount);
+
+        // Summary fault flag may be omitted as any failures will automatically propagate to dependent releasee (i.e. to groupReleasee)
+        world->PostThreadedCallsGroup(NULL, islandsAllowedThreadCount, groupReleasee, 
+            &dxIslandsProcessingCallContext::ThreadedProcessJobStart_Callback, (void *)&callContext, "World Islands Stepping Start");
+
+        // Wait until group completes (since jobs were the dependencies of the group the group is going to complete only after all the jobs end)
+        world->WaitThreadedCallExclusively(NULL, pcwGroupCallWait, NULL, "World Islands Stepping Wait");
+
+        if (summaryFault != 0) {
+            break;
+        }
+
+        result = true;
+    }
+    while (false);
+
+    return result;
+}
+
+
+int dxIslandsProcessingCallContext::ThreadedProcessGroup_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    return static_cast<dxIslandsProcessingCallContext *>(callContext)->ThreadedProcessGroup();
+}
+
+bool dxIslandsProcessingCallContext::ThreadedProcessGroup()
+{
+    // Do nothing - it's just a wrapper call
+    return true;
+}
+
+int dxIslandsProcessingCallContext::ThreadedProcessJobStart_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    static_cast<dxIslandsProcessingCallContext *>(callContext)->ThreadedProcessJobStart();
+    return true;
+}
+
+void dxIslandsProcessingCallContext::ThreadedProcessJobStart()
+{
+    dxWorldProcessContext *context = m_world->unsafeGetWorldProcessingContext(); 
+
+    dxWorldProcessMemArena *stepperArena = context->ObtainStepperMemArena();
+    dIASSERT(stepperArena != NULL && stepperArena->IsStructureValid());
+
+    const dxWorldProcessIslandsInfo &islandsInfo = m_islandsInfo;
+    dxBody *const *islandBodiesStart = islandsInfo.GetBodiesArray();
+    dxJoint *const *islandJointsStart = islandsInfo.GetJointsArray();
+
+    dxSingleIslandCallContext *stepperCallContext = (dxSingleIslandCallContext *)stepperArena->AllocateBlock(sizeof(dxSingleIslandCallContext));
+    // Save area state after context allocation to be restored for the stepper
+    void *arenaState = stepperArena->SaveState();
+    new(stepperCallContext) dxSingleIslandCallContext(this, stepperArena, arenaState, islandBodiesStart, islandJointsStart);
+
+    // Summary fault flag may be omitted as any failures will automatically propagate to dependent releasee (i.e. to m_groupReleasee)
+    m_world->PostThreadedCallForUnawareReleasee(NULL, NULL, 0, m_groupReleasee, NULL, 
+        &dxIslandsProcessingCallContext::ThreadedProcessIslandSearch_Callback, (void *)stepperCallContext, 0, "World Islands Stepping Selection");
+}
+
+int dxIslandsProcessingCallContext::ThreadedProcessIslandSearch_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxSingleIslandCallContext *stepperCallContext = static_cast<dxSingleIslandCallContext *>(callContext);
+    stepperCallContext->m_islandsProcessingContext->ThreadedProcessIslandSearch(stepperCallContext);
+    return true;
+}
+
+void dxIslandsProcessingCallContext::ThreadedProcessIslandSearch(dxSingleIslandCallContext *stepperCallContext)
+{
+    bool finalizeJob = false;
+
+    const dxWorldProcessIslandsInfo &islandsInfo = m_islandsInfo;
+    unsigned int const *islandSizes = islandsInfo.GetIslandSizes();
+
+    const sizeint islandsCount = islandsInfo.GetIslandsCount();
+    sizeint islandToProcess = ObtainNextIslandToBeProcessed(islandsCount);
+
+    if (islandToProcess != islandsCount) {
+        // First time, the counts are zeros and on next passes, adding counts will skip island that has just been processed by stepper
+        dxBody *const *islandBodiesStart = stepperCallContext->GetSelectedIslandBodiesEnd();
+        dxJoint *const *islandJointsStart = stepperCallContext->GetSelectedIslandJointsEnd();
+        sizeint islandIndex = stepperCallContext->m_islandIndex;
+
+        for (; ; ++islandIndex) {
+            unsigned int bcount = islandSizes[islandIndex * dxISE__MAX + dxISE_BODIES_COUNT];
+            unsigned int jcount = islandSizes[islandIndex * dxISE__MAX + dxISE_JOINTS_COUNT];
+
+            if (islandIndex == islandToProcess) {
+                // Store selected island details
+                stepperCallContext->AssignIslandSelection(islandBodiesStart, islandJointsStart, bcount, jcount);
+
+                // Store next island index to continue search from
+                ++islandIndex;
+                stepperCallContext->AssignIslandSearchProgress(islandIndex);
+
+                // Restore saved stepper memory arena position
+                stepperCallContext->RestoreSavedMemArenaStateForStepper();
+
+                dCallReleaseeID nextSearchReleasee;
+
+                // Summary fault flag may be omitted as any failures will automatically propagate to dependent releasee (i.e. to m_groupReleasee)
+                m_world->PostThreadedCallForUnawareReleasee(NULL, &nextSearchReleasee, 1, m_groupReleasee, NULL, 
+                    &dxIslandsProcessingCallContext::ThreadedProcessIslandSearch_Callback, (void *)stepperCallContext, 0, "World Islands Stepping Selection");
+
+                stepperCallContext->AssignStepperCallFinalReleasee(nextSearchReleasee);
+
+                m_world->PostThreadedCall(NULL, NULL, 0, nextSearchReleasee, NULL, 
+                    &dxIslandsProcessingCallContext::ThreadedProcessIslandStepper_Callback, (void *)stepperCallContext, 0, "Island Stepping Job Start");
+
+                break;
+            }
+
+            islandBodiesStart += bcount;
+            islandJointsStart += jcount;
+        }
+    }
+    else {
+        finalizeJob = true;
+    }
+
+    if (finalizeJob) {
+        dxWorldProcessMemArena *stepperArena = stepperCallContext->m_stepperArena;
+        stepperCallContext->dxSingleIslandCallContext::~dxSingleIslandCallContext();
+
+        dxWorldProcessContext *context = m_world->unsafeGetWorldProcessingContext(); 
+        context->ReturnStepperMemArena(stepperArena);
+    }
+}
+
+int dxIslandsProcessingCallContext::ThreadedProcessIslandStepper_Callback(void *callContext, dcallindex_t callInstanceIndex, dCallReleaseeID callThisReleasee)
+{
+    (void)callInstanceIndex; // unused
+    (void)callThisReleasee; // unused
+    dxSingleIslandCallContext *stepperCallContext = static_cast<dxSingleIslandCallContext *>(callContext);
+    stepperCallContext->m_islandsProcessingContext->ThreadedProcessIslandStepper(stepperCallContext);
+    return true;
+}
+
+void dxIslandsProcessingCallContext::ThreadedProcessIslandStepper(dxSingleIslandCallContext *stepperCallContext)
+{
+    m_stepper(&stepperCallContext->m_stepperCallContext);
+}
+
+sizeint dxIslandsProcessingCallContext::ObtainNextIslandToBeProcessed(sizeint islandsCount)
+{
+    return ThrsafeIncrementSizeUpToLimit(&m_islandToProcessStorage, islandsCount);
+}
+
+
+//****************************************************************************
+// World processing context management
+
+dxWorldProcessMemArena *dxWorldProcessMemArena::ReallocateMemArena (
+    dxWorldProcessMemArena *oldarena, sizeint memreq, 
+    const dxWorldProcessMemoryManager *memmgr, float rsrvfactor, unsigned rsrvminimum)
+{
+    dxWorldProcessMemArena *arena = oldarena;
+    bool allocsuccess = false;
+
+    sizeint nOldArenaSize; 
+    void *pOldArenaBuffer;
+
+    do {
+        sizeint oldmemsize = oldarena ? oldarena->GetMemorySize() : 0;
+        if (oldarena == NULL || oldmemsize < memreq) {
+            nOldArenaSize = oldarena ? dxWorldProcessMemArena::MakeArenaSize(oldmemsize) : 0;
+            pOldArenaBuffer = oldarena ? oldarena->m_pArenaBegin : NULL;
+
+            if (!dxWorldProcessMemArena::IsArenaPossible(memreq)) {
+                break;
+            }
+
+            sizeint arenareq = dxWorldProcessMemArena::MakeArenaSize(memreq);
+            sizeint arenareq_with_reserve = AdjustArenaSizeForReserveRequirements(arenareq, rsrvfactor, rsrvminimum);
+            sizeint memreq_with_reserve = memreq + (arenareq_with_reserve - arenareq);
+
+            if (oldarena != NULL) {
+                oldarena->m_pArenaMemMgr->m_fnFree(pOldArenaBuffer, nOldArenaSize);
+                oldarena = NULL;
+
+                // Zero variables to avoid another freeing on exit
+                pOldArenaBuffer = NULL;
+                nOldArenaSize = 0;
+            }
+
+            // Allocate new arena
+            void *pNewArenaBuffer = memmgr->m_fnAlloc(arenareq_with_reserve);
+            if (pNewArenaBuffer == NULL) {
+                break;
+            }
+
+            arena = (dxWorldProcessMemArena *)dEFFICIENT_PTR(pNewArenaBuffer);
+
+            void *blockbegin = dEFFICIENT_PTR(arena + 1);
+            void *blockend = dOFFSET_EFFICIENTLY(blockbegin, memreq_with_reserve);
+
+            arena->m_pAllocBegin = blockbegin;
+            arena->m_pAllocEnd = blockend;
+            arena->m_pArenaBegin = pNewArenaBuffer;
+            arena->m_pAllocCurrentOrNextArena = NULL;
+            arena->m_pArenaMemMgr = memmgr;
+        }
+
+        allocsuccess = true;
+    }
+    while (false);
+
+    if (!allocsuccess) {
+        if (pOldArenaBuffer != NULL) {
+            dIASSERT(oldarena != NULL);
+            oldarena->m_pArenaMemMgr->m_fnFree(pOldArenaBuffer, nOldArenaSize);
+        }
+        arena = NULL;
+    }
+
+    return arena;
+}
+
+void dxWorldProcessMemArena::FreeMemArena (dxWorldProcessMemArena *arena)
+{
+    sizeint memsize = arena->GetMemorySize();
+    sizeint arenasize = dxWorldProcessMemArena::MakeArenaSize(memsize);
+
+    void *pArenaBegin = arena->m_pArenaBegin;
+    arena->m_pArenaMemMgr->m_fnFree(pArenaBegin, arenasize);
+}
+
+
+sizeint dxWorldProcessMemArena::AdjustArenaSizeForReserveRequirements(sizeint arenareq, float rsrvfactor, unsigned rsrvminimum)
+{
+    float scaledarena = arenareq * rsrvfactor;
+    sizeint adjustedarena = (scaledarena < SIZE_MAX) ? (sizeint)scaledarena : SIZE_MAX;
+    sizeint boundedarena = (adjustedarena > rsrvminimum) ? adjustedarena : (sizeint)rsrvminimum;
+    return dEFFICIENT_SIZE(boundedarena);
+}
+
+
+bool dxReallocateWorldProcessContext (dxWorld *world, dxWorldProcessIslandsInfo &islandsInfo, 
+    dReal stepSize, dmemestimate_fn_t stepperEstimate)
+{
+    bool result = false;
+
+    do
+    {
+        dxStepWorkingMemory *wmem = AllocateOnDemand(world->wmem);
+        if (wmem == NULL)
+        {
+            break;
+        }
+
+        dxWorldProcessContext *context = wmem->SureGetWorldProcessingContext();
+        if (context == NULL)
+        {
+            break;
+        }
+
+        if (!context->EnsureStepperSyncObjectsAreAllocated(world))
+        {
+            break;
+        }
+
+        const dxWorldProcessMemoryReserveInfo *reserveInfo = wmem->SureGetMemoryReserveInfo();
+        const dxWorldProcessMemoryManager *memmgr = wmem->SureGetMemoryManager();
+
+        sizeint islandsReq = EstimateIslandProcessingMemoryRequirements(world);
+        dIASSERT(islandsReq == dEFFICIENT_SIZE(islandsReq));
+
+        dxWorldProcessMemArena *islandsArena = context->ReallocateIslandsMemArena(islandsReq, memmgr, 1.0f, reserveInfo->m_uiReserveMinimum);
+        if (islandsArena == NULL)
+        {
+            break;
+        }
+        dIASSERT(islandsArena->IsStructureValid());
+
+        sizeint stepperReq = BuildIslandsAndEstimateStepperMemoryRequirements(islandsInfo, islandsArena, world, stepSize, stepperEstimate);
+        dIASSERT(stepperReq == dEFFICIENT_SIZE(stepperReq));
+
+        sizeint stepperReqWithCallContext = stepperReq + dEFFICIENT_SIZE(sizeof(dxSingleIslandCallContext));
+
+        unsigned islandThreadsCount = world->calculateIslandProcessingMaxThreadCount();
+        if (!context->ReallocateStepperMemArenas(world, islandThreadsCount, stepperReqWithCallContext, 
+            memmgr, reserveInfo->m_fReserveFactor, reserveInfo->m_uiReserveMinimum))
+        {
+            break;
+        }
+
+        result = true;
+    }
+    while (false);
+
+    return result;
+}
+
+dxWorldProcessMemArena *dxAllocateTemporaryWorldProcessMemArena(
+    sizeint memreq, const dxWorldProcessMemoryManager *memmgr/*=NULL*/, const dxWorldProcessMemoryReserveInfo *reserveinfo/*=NULL*/)
+{
+    const dxWorldProcessMemoryManager *surememmgr = memmgr ? memmgr : &g_WorldProcessMallocMemoryManager;
+    const dxWorldProcessMemoryReserveInfo *surereserveinfo = reserveinfo ? reserveinfo : &g_WorldProcessDefaultReserveInfo;
+    dxWorldProcessMemArena *arena = dxWorldProcessMemArena::ReallocateMemArena(NULL, memreq, surememmgr, surereserveinfo->m_fReserveFactor, surereserveinfo->m_uiReserveMinimum);
+    return arena;
+}
+
+void dxFreeTemporaryWorldProcessMemArena(dxWorldProcessMemArena *arena)
+{
+    dxWorldProcessMemArena::FreeMemArena(arena);
+}
+
diff --git a/libs/ode-0.16.1/ode/src/util.h b/libs/ode-0.16.1/ode/src/util.h
new file mode 100644
index 0000000..ca222ac
--- /dev/null
+++ b/libs/ode-0.16.1/ode/src/util.h
@@ -0,0 +1,440 @@
+/*************************************************************************
+ *                                                                       *
+ * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith.       *
+ * All rights reserved.  Email: russ@q12.org   Web: www.q12.org          *
+ *                                                                       *
+ * This library is free software; you can redistribute it and/or         *
+ * modify it under the terms of EITHER:                                  *
+ *   (1) The GNU Lesser General Public License as published by the Free  *
+ *       Software Foundation; either version 2.1 of the License, or (at  *
+ *       your option) any later version. The text of the GNU Lesser      *
+ *       General Public License is included with this library in the     *
+ *       file LICENSE.TXT.                                               *
+ *   (2) The BSD-style license that is included with this library in     *
+ *       the file LICENSE-BSD.TXT.                                       *
+ *                                                                       *
+ * This library is distributed in the hope that it will be useful,       *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files    *
+ * LICENSE.TXT and LICENSE-BSD.TXT for more details.                     *
+ *                                                                       *
+ *************************************************************************/
+
+#ifndef _ODE_UTIL_H_
+#define _ODE_UTIL_H_
+
+#include "objects.h"
+#include "common.h"
+
+
+/* utility */
+
+void dInternalHandleAutoDisabling (dxWorld *world, dReal stepsize);
+void dxStepBody (dxBody *b, dReal h);
+
+
+struct dxWorldProcessMemoryManager:
+    public dBase
+{
+    typedef void *(*alloc_block_fn_t)(sizeint block_size);
+    typedef void *(*shrink_block_fn_t)(void *block_pointer, sizeint block_current_size, sizeint block_smaller_size);
+    typedef void (*free_block_fn_t)(void *block_pointer, sizeint block_current_size);
+
+    dxWorldProcessMemoryManager(alloc_block_fn_t fnAlloc, shrink_block_fn_t fnShrink, free_block_fn_t fnFree)
+    {
+        Assign(fnAlloc, fnShrink, fnFree);
+    }
+
+    void Assign(alloc_block_fn_t fnAlloc, shrink_block_fn_t fnShrink, free_block_fn_t fnFree)
+    {
+        m_fnAlloc = fnAlloc;
+        m_fnShrink = fnShrink;
+        m_fnFree = fnFree;
+    }
+
+    alloc_block_fn_t m_fnAlloc;
+    shrink_block_fn_t m_fnShrink;
+    free_block_fn_t m_fnFree;
+};
+
+extern dxWorldProcessMemoryManager g_WorldProcessMallocMemoryManager;
+
+struct dxWorldProcessMemoryReserveInfo:
+    public dBase
+{
+    dxWorldProcessMemoryReserveInfo(float fReserveFactor, unsigned uiReserveMinimum)
+    {
+        Assign(fReserveFactor, uiReserveMinimum);
+    }
+
+    void Assign(float fReserveFactor, unsigned uiReserveMinimum)
+    {
+        m_fReserveFactor = fReserveFactor;
+        m_uiReserveMinimum = uiReserveMinimum;
+    }
+
+    float m_fReserveFactor; // Use float as precision does not matter here
+    unsigned m_uiReserveMinimum;
+};
+
+extern dxWorldProcessMemoryReserveInfo g_WorldProcessDefaultReserveInfo;
+
+
+class dxWorldProcessMemArena:
+    private dBase // new/delete must not be called for this class
+{
+public:
+#define BUFFER_TO_ARENA_EXTRA (EFFICIENT_ALIGNMENT + dEFFICIENT_SIZE(sizeof(dxWorldProcessMemArena)))
+    static bool IsArenaPossible(sizeint nBufferSize)
+    {
+        return SIZE_MAX - BUFFER_TO_ARENA_EXTRA >= nBufferSize; // This ensures there will be no overflow
+    }
+
+    static sizeint MakeBufferSize(sizeint nArenaSize)
+    {
+        return nArenaSize - BUFFER_TO_ARENA_EXTRA;
+    }
+
+    static sizeint MakeArenaSize(sizeint nBufferSize)
+    {
+        return BUFFER_TO_ARENA_EXTRA + nBufferSize;
+    }
+#undef BUFFER_TO_ARENA_EXTRA
+
+    bool IsStructureValid() const
+    {
+        return m_pAllocBegin != NULL && m_pAllocEnd != NULL && m_pAllocBegin <= m_pAllocEnd 
+            && (m_pAllocCurrentOrNextArena == NULL || m_pAllocCurrentOrNextArena == m_pAllocBegin) 
+            && m_pArenaBegin != NULL && m_pArenaBegin <= m_pAllocBegin; 
+    }
+
+    sizeint GetMemorySize() const
+    {
+        return (sizeint)m_pAllocEnd - (sizeint)m_pAllocBegin;
+    }
+
+    void *SaveState() const
+    {
+        return m_pAllocCurrentOrNextArena;
+    }
+
+    void RestoreState(void *state)
+    {
+        m_pAllocCurrentOrNextArena = state;
+    }
+
+    void ResetState()
+    {
+        m_pAllocCurrentOrNextArena = m_pAllocBegin;
+    }
+
+    void *PeekBufferRemainder() const
+    {
+        return m_pAllocCurrentOrNextArena;
+    }
+
+    void *AllocateBlock(sizeint size)
+    {
+        void *arena = m_pAllocCurrentOrNextArena;
+        m_pAllocCurrentOrNextArena = dOFFSET_EFFICIENTLY(arena, size);
+        dIASSERT(m_pAllocCurrentOrNextArena <= m_pAllocEnd);
+        dIASSERT(dEFFICIENT_PTR(arena) == arena);
+        
+        return arena;
+    }
+
+    void *AllocateOveralignedBlock(sizeint size, unsigned alignment)
+    {
+        void *arena = m_pAllocCurrentOrNextArena;
+        m_pAllocCurrentOrNextArena = dOFFSET_OVERALIGNEDLY(arena, size, alignment);
+        dIASSERT(m_pAllocCurrentOrNextArena <= m_pAllocEnd);
+
+        void *block = dOVERALIGNED_PTR(arena, alignment);
+        return block;
+    }
+
+    template<typename ElementType>
+    ElementType *AllocateArray(sizeint count)
+    {
+        return (ElementType *)AllocateBlock(count * sizeof(ElementType));
+    }
+
+    template<typename ElementType>
+    ElementType *AllocateOveralignedArray(sizeint count, unsigned alignment)
+    {
+        return (ElementType *)AllocateOveralignedBlock(count * sizeof(ElementType), alignment);
+    }
+
+    template<typename ElementType>
+    void ShrinkArray(ElementType *arr, sizeint oldcount, sizeint newcount)
+    {
+        dIASSERT(newcount <= oldcount);
+        dIASSERT(dOFFSET_EFFICIENTLY(arr, oldcount * sizeof(ElementType)) == m_pAllocCurrentOrNextArena);
+        m_pAllocCurrentOrNextArena = dOFFSET_EFFICIENTLY(arr, newcount * sizeof(ElementType));
+    }
+
+public:
+    static dxWorldProcessMemArena *ReallocateMemArena (
+        dxWorldProcessMemArena *oldarena, sizeint memreq, 
+        const dxWorldProcessMemoryManager *memmgr, float rsrvfactor, unsigned rsrvminimum);
+    static void FreeMemArena (dxWorldProcessMemArena *arena);
+
+    dxWorldProcessMemArena *GetNextMemArena() const { return (dxWorldProcessMemArena *)m_pAllocCurrentOrNextArena; }
+    void SetNextMemArena(dxWorldProcessMemArena *pArenaInstance) { m_pAllocCurrentOrNextArena = pArenaInstance; }
+
+private:
+    static sizeint AdjustArenaSizeForReserveRequirements(sizeint arenareq, float rsrvfactor, unsigned rsrvminimum);
+
+private:
+    void *m_pAllocCurrentOrNextArena;
+    void *m_pAllocBegin;
+    void *m_pAllocEnd;
+    void *m_pArenaBegin;
+
+    const dxWorldProcessMemoryManager *m_pArenaMemMgr;
+};
+
+class dxWorldProcessContext:
+    public dBase
+{
+public:
+    dxWorldProcessContext();
+    ~dxWorldProcessContext();
+
+    void CleanupWorldReferences(dxWorld *pswWorldInstance);
+
+public:
+    bool EnsureStepperSyncObjectsAreAllocated(dxWorld *pswWorldInstance);
+    dCallWaitID GetIslandsSteppingWait() const { return m_pcwIslandsSteppingWait; }
+
+public:
+    dxWorldProcessMemArena *ObtainStepperMemArena();
+    void ReturnStepperMemArena(dxWorldProcessMemArena *pmaArenaInstance);
+
+    dxWorldProcessMemArena *ReallocateIslandsMemArena(sizeint nMemoryRequirement, 
+        const dxWorldProcessMemoryManager *pmmMemortManager, float fReserveFactor, unsigned uiReserveMinimum);
+    bool ReallocateStepperMemArenas(dxWorld *world, unsigned nIslandThreadsCount, sizeint nMemoryRequirement, 
+        const dxWorldProcessMemoryManager *pmmMemortManager, float fReserveFactor, unsigned uiReserveMinimum);
+
+private:
+    static void FreeArenasList(dxWorldProcessMemArena *pmaExistingArenas);
+
+private:
+    void SetIslandsMemArena(dxWorldProcessMemArena *pmaInstance) { m_pmaIslandsArena = pmaInstance; }
+    dxWorldProcessMemArena *GetIslandsMemArena() const { return m_pmaIslandsArena; }
+
+    void SetStepperArenasList(dxWorldProcessMemArena *pmaInstance) { m_pmaStepperArenas = pmaInstance; }
+    dxWorldProcessMemArena *GetStepperArenasList() const { return m_pmaStepperArenas; }
+
+    inline dxWorldProcessMemArena *GetStepperArenasHead() const;
+    inline bool TryExtractingStepperArenasHead(dxWorldProcessMemArena *pmaHeadInstance);
+    inline bool TryInsertingStepperArenasHead(dxWorldProcessMemArena *pmaArenaInstance, dxWorldProcessMemArena *pmaExistingHead);
+
+public:
+    void LockForAddLimotSerialization();
+    void UnlockForAddLimotSerialization();
+    void LockForStepbodySerialization();
+    void UnlockForStepbodySerialization();
+
+private:
+    enum dxProcessContextMutex
+    {
+        dxPCM_STEPPER_ARENA_OBTAIN,
+        dxPCM_STEPPER_ADDLIMOT_SERIALIZE,
+        dxPCM_STEPPER_STEPBODY_SERIALIZE,
+
+        dxPCM__MAX
+    };
+
+    static const char *const m_aszContextMutexNames[dxPCM__MAX];
+
+private:
+    dxWorldProcessMemArena  *m_pmaIslandsArena;
+    dxWorldProcessMemArena  *volatile m_pmaStepperArenas;
+    dxWorld                 *m_pswObjectsAllocWorld;
+    dMutexGroupID           m_pmgStepperMutexGroup;
+    dCallWaitID             m_pcwIslandsSteppingWait;
+};
+
+struct dxWorldProcessIslandsInfo
+{
+    void AssignInfo(sizeint islandcount, unsigned int const *islandsizes, dxBody *const *bodies, dxJoint *const *joints)
+    {
+        m_IslandCount = islandcount;
+        m_pIslandSizes = islandsizes;
+        m_pBodies = bodies;
+        m_pJoints = joints;
+    }
+
+    sizeint GetIslandsCount() const { return m_IslandCount; }
+    unsigned int const *GetIslandSizes() const { return m_pIslandSizes; }
+    dxBody *const *GetBodiesArray() const { return m_pBodies; }
+    dxJoint *const *GetJointsArray() const { return m_pJoints; }
+
+private:
+    sizeint                  m_IslandCount;
+    unsigned int const      *m_pIslandSizes;
+    dxBody *const           *m_pBodies;
+    dxJoint *const          *m_pJoints;
+};
+
+struct dxStepperProcessingCallContext
+{
+    dxStepperProcessingCallContext(dxWorld *world, dReal stepSize, unsigned stepperAllowedThreads, 
+        dxWorldProcessMemArena *stepperArena, dxBody *const *islandBodiesStart, dxJoint *const *islandJointsStart): 
+        m_world(world), m_stepSize(stepSize), m_stepperArena(stepperArena), m_finalReleasee(NULL), 
+        m_islandBodiesStart(islandBodiesStart), m_islandJointsStart(islandJointsStart), m_islandBodiesCount(0), m_islandJointsCount(0),
+        m_stepperAllowedThreads(stepperAllowedThreads)
+    {
+    }
+
+    void AssignIslandSelection(dxBody *const *islandBodiesStart, dxJoint *const *islandJointsStart, 
+        unsigned islandBodiesCount, unsigned islandJointsCount)
+    {
+        m_islandBodiesStart = islandBodiesStart;
+        m_islandJointsStart = islandJointsStart;
+        m_islandBodiesCount = islandBodiesCount;
+        m_islandJointsCount = islandJointsCount;
+    }
+
+    dxBody *const *GetSelectedIslandBodiesEnd() const { return m_islandBodiesStart + m_islandBodiesCount; }
+    dxJoint *const *GetSelectedIslandJointsEnd() const { return m_islandJointsStart + m_islandJointsCount; }
+
+    void AssignStepperCallFinalReleasee(dCallReleaseeID finalReleasee)
+    {
+        m_finalReleasee = finalReleasee;
+    }
+
+    dxWorld                 *const m_world;
+    dReal                   const m_stepSize;
+    dxWorldProcessMemArena  *m_stepperArena;
+    dCallReleaseeID         m_finalReleasee;
+    dxBody *const           *m_islandBodiesStart;
+    dxJoint *const          *m_islandJointsStart;
+    unsigned                m_islandBodiesCount;
+    unsigned                m_islandJointsCount;
+    unsigned                m_stepperAllowedThreads;
+};
+
+#define BEGIN_STATE_SAVE(memarena, state) void *state = memarena->SaveState();
+#define END_STATE_SAVE(memarena, state) memarena->RestoreState(state)
+
+typedef void (*dstepper_fn_t) (const dxStepperProcessingCallContext *callContext);
+typedef unsigned (*dmaxcallcountestimate_fn_t) (unsigned activeThreadCount, unsigned allowedThreadCount);
+
+bool dxProcessIslands (dxWorld *world, const dxWorldProcessIslandsInfo &islandsInfo, 
+                       dReal stepSize, dstepper_fn_t stepper, dmaxcallcountestimate_fn_t maxCallCountEstimator);
+
+
+typedef sizeint (*dmemestimate_fn_t) (dxBody * const *body, unsigned int nb, 
+                                     dxJoint * const *_joint, unsigned int _nj);
+
+bool dxReallocateWorldProcessContext (dxWorld *world, dxWorldProcessIslandsInfo &islandsinfo, 
+                                      dReal stepsize, dmemestimate_fn_t stepperestimate);
+
+dxWorldProcessMemArena *dxAllocateTemporaryWorldProcessMemArena(
+    sizeint memreq, const dxWorldProcessMemoryManager *memmgr/*=NULL*/, const dxWorldProcessMemoryReserveInfo *reserveinfo/*=NULL*/);
+void dxFreeTemporaryWorldProcessMemArena(dxWorldProcessMemArena *arena);
+
+
+template<class ClassType>
+inline ClassType *AllocateOnDemand(ClassType *&pctStorage)
+{
+    ClassType *pctCurrentInstance = pctStorage;
+
+    if (!pctCurrentInstance)
+    {
+        pctCurrentInstance = new ClassType();
+        pctStorage = pctCurrentInstance;
+    }
+
+    return pctCurrentInstance;
+}
+
+
+// World stepping working memory object
+class dxStepWorkingMemory:
+    public dBase
+{
+public:
+    dxStepWorkingMemory(): m_uiRefCount(1), m_ppcProcessingContext(NULL), m_priReserveInfo(NULL), m_pmmMemoryManager(NULL) {}
+
+private:
+    friend struct dBase; // To avoid GCC warning regarding private destructor
+    ~dxStepWorkingMemory() // Use Release() instead
+    {
+        delete m_ppcProcessingContext;
+        delete m_priReserveInfo;
+        delete m_pmmMemoryManager;
+    }
+
+public:
+    void Addref()
+    {
+        dIASSERT(~m_uiRefCount != 0);
+        ++m_uiRefCount;
+    }
+
+    void Release()
+    {
+        dIASSERT(m_uiRefCount != 0);
+        if (--m_uiRefCount == 0)
+        {
+            delete this;
+        }
+    }
+
+public:
+    void CleanupMemory()
+    {
+        delete m_ppcProcessingContext;
+        m_ppcProcessingContext = NULL;
+    }
+
+    void CleanupWorldReferences(dxWorld *world)
+    {
+        if (m_ppcProcessingContext != NULL)
+        {
+            m_ppcProcessingContext->CleanupWorldReferences(world);
+        }
+    }
+
+public: 
+    dxWorldProcessContext *SureGetWorldProcessingContext() { return AllocateOnDemand(m_ppcProcessingContext); }
+    dxWorldProcessContext *GetWorldProcessingContext() const { return m_ppcProcessingContext; }
+
+    const dxWorldProcessMemoryReserveInfo *GetMemoryReserveInfo() const { return m_priReserveInfo; }
+    const dxWorldProcessMemoryReserveInfo *SureGetMemoryReserveInfo() const { return m_priReserveInfo ? m_priReserveInfo : &g_WorldProcessDefaultReserveInfo; }
+    void SetMemoryReserveInfo(float fReserveFactor, unsigned uiReserveMinimum)
+    {
+        if (m_priReserveInfo) { m_priReserveInfo->Assign(fReserveFactor, uiReserveMinimum); }
+        else { m_priReserveInfo = new dxWorldProcessMemoryReserveInfo(fReserveFactor, uiReserveMinimum); }
+    }
+    void ResetMemoryReserveInfoToDefault()
+    {
+        if (m_priReserveInfo) { delete m_priReserveInfo; m_priReserveInfo = NULL; }
+    }
+
+    const dxWorldProcessMemoryManager *GetMemoryManager() const { return m_pmmMemoryManager; }
+    const dxWorldProcessMemoryManager *SureGetMemoryManager() const { return m_pmmMemoryManager ? m_pmmMemoryManager : &g_WorldProcessMallocMemoryManager; }
+    void SetMemoryManager(dxWorldProcessMemoryManager::alloc_block_fn_t fnAlloc, 
+        dxWorldProcessMemoryManager::shrink_block_fn_t fnShrink, 
+        dxWorldProcessMemoryManager::free_block_fn_t fnFree) 
+    {
+        if (m_pmmMemoryManager) { m_pmmMemoryManager->Assign(fnAlloc, fnShrink, fnFree); }
+        else { m_pmmMemoryManager = new dxWorldProcessMemoryManager(fnAlloc, fnShrink, fnFree); }
+    }
+    void ResetMemoryManagerToDefault()
+    {
+        if (m_pmmMemoryManager) { delete m_pmmMemoryManager; m_pmmMemoryManager = NULL; }
+    }
+
+private:
+    unsigned m_uiRefCount;
+    dxWorldProcessContext *m_ppcProcessingContext;
+    dxWorldProcessMemoryReserveInfo *m_priReserveInfo;
+    dxWorldProcessMemoryManager *m_pmmMemoryManager;
+};
+
+
+#endif