[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Getfem-commits] (no subject)
From: |
Yves Renard |
Subject: |
[Getfem-commits] (no subject) |
Date: |
Sat, 23 Sep 2017 14:10:22 -0400 (EDT) |
branch: master
commit a06ee069fa8ac246842f698d89006bdb8ec28e0d
Author: Yves Renard <address@hidden>
Date: Sat Sep 23 18:01:53 2017 +0200
Another adaptation for 64 bit integer blas/lapack, ipvt interface
structure. Change a bit the call of some factorization functions
---
configure.ac | 7 ++--
src/getfem_mesher.cc | 2 +-
src/gmm/gmm_dense_lu.h | 66 ++++++++++++++++++++++++++++++--------
src/gmm/gmm_lapack_interface.h | 72 ++++++++++++++++++++++++++++++++----------
src/gmm/gmm_opt.h | 9 ++++--
5 files changed, 118 insertions(+), 38 deletions(-)
diff --git a/configure.ac b/configure.ac
index ac0014d..9d641be 100644
--- a/configure.ac
+++ b/configure.ac
@@ -870,18 +870,19 @@ if test "$usematlab" != NO; then
if $(echo "" | $MEX 2>&1 | grep 'This is .*TeX'); then
AC_MSG_ERROR([the mex binary which is in the PATH appears to be part
of LaTeX, not matlab !! run ./configure MEX=/path/to/matlab/mex]);
fi;
- AC_CHECK_PROGS(MEXEXT, mexext)
+
# MATLAB_ROOT=`$MEX -v 2>&1 | grep "MATLAB " | awk '{print $4}'|sed -e
'2,$d'`
MEX_EXE=`which $MEX`
MEX_EXE=`readlink -e $MEX_EXE`
- MATLAB_ROOT=`echo $MEX_EXE | sed -e 's/bin.*$//'`
+ MATLAB_ROOT=`echo $MEX_EXE | sed -e 's/bin.*$//'`
+ MEXEXT=$MATLAB_ROOT/bin/mexext
Matlab_INC_DIR=$MATLAB_ROOT/extern/include
echo "checking for matlab path... " $MATLAB_ROOT
MATLAB_COM_EXT=.`$MEXEXT`
echo "checking for mex extension... " $MATLAB_COM_EXT
# MATLAB_RELEASE=`grep "MATLAB R" $MATLAB_ROOT/extern/src/mexversion.c |
awk '{print $4}' | sed -e 's/R//'`
# MATLAB_RELEASE=`grep "full_ver="$(which $MEX) | sed 's/[[^0-9]]//g'` #
double brackets are for escaping reasons.
- MATLAB_RELEASE=`echo $MATLAB_ROOT | sed 's/^.*R//g' | sed 's/\/$//'`
+ MATLAB_RELEASE=`matlab -nosplash -nojvm -r "exit" | grep "(R2" | sed
's/^.*R//g' | sed 's/).*$//'`
echo "Matlab release is : R$MATLAB_RELEASE"
fi
fi
diff --git a/src/getfem_mesher.cc b/src/getfem_mesher.cc
index b0693f8..5e6fb24 100644
--- a/src/getfem_mesher.cc
+++ b/src/getfem_mesher.cc
@@ -109,7 +109,7 @@ namespace getfem {
// cout << "nbco = " << nbco << endl;
std::vector<const mesher_signed_distance*> ls(nbco);
std::vector<scalar_type> d(nbco), v(nbco);
- std::vector<long> ipvt(nbco);
+ gmm::lapack_ipvt ipvt(nbco);
gmm::col_matrix<base_node> G(N, nbco);
gmm::dense_matrix<scalar_type> H(nbco, nbco);
base_small_vector dd(N);
diff --git a/src/gmm/gmm_dense_lu.h b/src/gmm/gmm_dense_lu.h
index cb53771..1c6269a 100644
--- a/src/gmm/gmm_dense_lu.h
+++ b/src/gmm/gmm_dense_lu.h
@@ -70,10 +70,50 @@
#define GMM_DENSE_LU_H
#include "gmm_dense_Householder.h"
-#include "gmm_opt.h"
namespace gmm {
+ /* ********************************************************************** */
+ /* IPVT structure. */
+ /* ********************************************************************** */
+ // For compatibility with lapack version with 64 or 32 bit integer.
+ // Should be replaced by std::vector<size_type> if 32 bit integer version
+ // of lapack is not used anymore (and lapack_ipvt_int set to size_type)
+
+ // Do not use iterators of this interface container
+ class lapack_ipvt : public std::vector<size_type> {
+ bool is_int64;
+ size_type &operator[](size_type i)
+ { return std::vector<size_type>::operator[](i); }
+ size_type operator[] (size_type i) const
+ { return std::vector<size_type>::operator[](i); }
+ void begin(void) const {}
+ void begin(void) {}
+ void end(void) const {}
+ void end(void) {}
+
+ public:
+ void set_to_int32() { is_int64 = false; }
+ const size_type *pfirst() const
+ { return &(*(std::vector<size_type>::begin())); }
+ size_type *pfirst() { return &(*(std::vector<size_type>::begin())); }
+
+ lapack_ipvt(size_type n) : std::vector<size_type>(n), is_int64(true) {}
+
+ size_type get(size_type i) const {
+ const size_type *p = pfirst();
+ return is_int64 ? p[i] : size_type(((const int *)(p))[i]);
+ }
+ void set(size_type i, size_type val) {
+ size_type *p = pfirst();
+ if (is_int64) p[i] = val; else ((int *)(p))[i] = int(val);
+ }
+ };
+}
+
+#include "gmm_opt.h"
+
+namespace gmm {
/** LU Factorization of a general (dense) matrix (real or complex).
@@ -85,24 +125,23 @@ namespace gmm {
The pivot indices in ipvt are indexed starting from 1
so that this is compatible with LAPACK (Fortran).
*/
- template <typename DenseMatrix, typename Pvector>
- size_type lu_factor(DenseMatrix& A, Pvector& ipvt) {
+ template <typename DenseMatrix>
+ size_type lu_factor(DenseMatrix& A, lapack_ipvt& ipvt) {
typedef typename linalg_traits<DenseMatrix>::value_type T;
- typedef typename linalg_traits<Pvector>::value_type int_T;
typedef typename number_traits<T>::magnitude_type R;
size_type info(0), i, j, jp, M(mat_nrows(A)), N(mat_ncols(A));
size_type NN = std::min(M, N);
std::vector<T> c(M), r(N);
GMM_ASSERT2(ipvt.size()+1 >= NN, "IPVT too small");
- for (i = 0; i+1 < NN; ++i) ipvt[i] = int_T(i);
+ for (i = 0; i+1 < NN; ++i) ipvt.set(i, i);
if (M || N) {
for (j = 0; j+1 < NN; ++j) {
R max = gmm::abs(A(j,j)); jp = j;
for (i = j+1; i < M; ++i) /* find pivot. */
if (gmm::abs(A(i,j)) > max) { jp = i; max = gmm::abs(A(i,j)); }
- ipvt[j] = int_T(jp + 1);
+ ipvt.set(j, jp + 1);
if (max == R(0)) { info = j + 1; break; }
if (jp != j) for (i = 0; i < N; ++i) std::swap(A(jp, i), A(j, i));
@@ -112,7 +151,7 @@ namespace gmm {
rank_one_update(sub_matrix(A, sub_interval(j+1, M-j-1),
sub_interval(j+1, N-j-1)), c, conjugated(r));
}
- ipvt[NN-1] = int_T(NN);
+ ipvt.set(NN-1, NN);
}
return info;
}
@@ -126,7 +165,7 @@ namespace gmm {
typedef typename linalg_traits<DenseMatrix>::value_type T;
copy(b, x);
for(size_type i = 0; i < pvector.size(); ++i) {
- size_type perm = pvector[i]-1; // permutations stored in 1's offset
+ size_type perm = pvector.get(i)-1; // permutations stored in 1's offset
if(i != perm) { T aux = x[i]; x[i] = x[perm]; x[perm] = aux; }
}
/* solve Ax = b -> LUx = b -> Ux = L^-1 b. */
@@ -138,7 +177,7 @@ namespace gmm {
void lu_solve(const DenseMatrix &A, VectorX &x, const VectorB &b) {
typedef typename linalg_traits<DenseMatrix>::value_type T;
dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
- std::vector<long> ipvt(mat_nrows(A));
+ lapack_ipvt ipvt(mat_nrows(A));
gmm::copy(A, B);
size_type info = lu_factor(B, ipvt);
GMM_ASSERT1(!info, "Singular system, pivot = " << info);
@@ -154,10 +193,9 @@ namespace gmm {
lower_tri_solve(transposed(LU), x, false);
upper_tri_solve(transposed(LU), x, true);
for(size_type i = pvector.size(); i > 0; --i) {
- size_type perm = pvector[i-1]-1; // permutations stored in 1's offset
+ size_type perm = pvector.get(i-1)-1; // permutations stored in 1's offset
if(i-1 != perm) { T aux = x[i-1]; x[i-1] = x[perm]; x[perm] = aux; }
}
-
}
@@ -212,7 +250,7 @@ namespace gmm {
typedef typename linalg_traits<DenseMatrix>::value_type T;
DenseMatrix& A = const_cast<DenseMatrix&>(A_);
dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
- std::vector<long> ipvt(mat_nrows(A));
+ lapack_ipvt ipvt(mat_nrows(A));
gmm::copy(A, B);
size_type info = lu_factor(B, ipvt);
if (doassert) GMM_ASSERT1(!info, "Non invertible matrix, pivot = "<<info);
@@ -229,7 +267,7 @@ namespace gmm {
for (size_type j = 0; j < std::min(mat_nrows(LU), mat_ncols(LU)); ++j)
det *= LU(j,j);
for(size_type i = 0; i < pvector.size(); ++i)
- if (i != size_type(pvector[i]-1)) { det = -det; }
+ if (i != size_type(pvector.get(i)-1)) { det = -det; }
return det;
}
@@ -238,7 +276,7 @@ namespace gmm {
lu_det(const DenseMatrix& A) {
typedef typename linalg_traits<DenseMatrix>::value_type T;
dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
- std::vector<long> ipvt(mat_nrows(A));
+ lapack_ipvt ipvt(mat_nrows(A));
gmm::copy(A, B);
lu_factor(B, ipvt);
return lu_det(B, ipvt);
diff --git a/src/gmm/gmm_lapack_interface.h b/src/gmm/gmm_lapack_interface.h
index 148f664..24cd78c 100644
--- a/src/gmm/gmm_lapack_interface.h
+++ b/src/gmm/gmm_lapack_interface.h
@@ -96,33 +96,19 @@ namespace gmm {
void sgeesx_(...); void dgeesx_(...); void cgeesx_(...); void zgeesx_(...);
void sgesvd_(...); void dgesvd_(...); void cgesvd_(...); void zgesvd_(...);
}
-
- // For compatibility with lapack version with 32 bit integer.
-# define int_to_long_ipvt(ipvt) \
- { \
- for (size_type ii = ipvt.size(); ii != 0; --ii) \
- ipvt[ii-1] = long(((int *)(&ipvt[0]))[ii-1]); \
- }
-
-// # define long_to_int_ipvt(ipvt)
-// {
-// long *ipvt_ = const_cast<long *>(&ipvt[0]);
-// for (size_type ii = 0; ii < ipvt.size(); ++ii)
-// ((int *)(&ipvt_[0]))[ii] = int(ipvt_[ii]);
-// }
/* ********************************************************************** */
/* LU decomposition. */
/* ********************************************************************** */
# define getrf_interface(lapack_name, base_type) inline \
- size_type lu_factor(dense_matrix<base_type > &A, std::vector<long> &ipvt){ \
+ size_type lu_factor(dense_matrix<base_type > &A, lapack_ipvt &ipvt){ \
GMMLAPACK_TRACE("getrf_interface"); \
long m = long(mat_nrows(A)), n = long(mat_ncols(A)), lda(m), info(-1L); \
- if (m && n) lapack_name(&m, &n, &A(0,0), &lda, &ipvt[0], &info); \
+ if (m && n) lapack_name(&m, &n, &A(0,0), &lda, ipvt.pfirst(), &info); \
if ((info & 0xFFFFFFFF00000000L) && !(info & 0x00000000FFFFFFFFL)) \
/* For compatibility with lapack version with 32 bit integer. */ \
- int_to_long_ipvt(ipvt); \
+ ipvt.set_to_int32(); \
return size_type(int(info & 0x00000000FFFFFFFFL)); \
}
@@ -131,6 +117,58 @@ namespace gmm {
getrf_interface(cgetrf_, BLAS_C)
getrf_interface(zgetrf_, BLAS_Z)
+ /* ********************************************************************* */
+ /* LU solve. */
+ /* ********************************************************************* */
+
+# define getrs_interface(f_name, trans1, lapack_name, base_type) inline \
+ void f_name(const dense_matrix<base_type > &A, \
+ const lapack_ipvt &ipvt, std::vector<base_type > &x, \
+ const std::vector<base_type > &b) { \
+ GMMLAPACK_TRACE("getrs_interface"); \
+ long n = long(mat_nrows(A)), info(0), nrhs(1); \
+ gmm::copy(b, x); trans1; \
+ if (n) \
+ lapack_name(&t,&n,&nrhs,&(A(0,0)),&n,ipvt.pfirst(),&x[0],&n,&info); \
+ }
+
+# define getrs_trans_n const char t = 'N'
+# define getrs_trans_t const char t = 'T'
+
+ getrs_interface(lu_solve, getrs_trans_n, sgetrs_, BLAS_S)
+ getrs_interface(lu_solve, getrs_trans_n, dgetrs_, BLAS_D)
+ getrs_interface(lu_solve, getrs_trans_n, cgetrs_, BLAS_C)
+ getrs_interface(lu_solve, getrs_trans_n, zgetrs_, BLAS_Z)
+ getrs_interface(lu_solve_transposed, getrs_trans_t, sgetrs_, BLAS_S)
+ getrs_interface(lu_solve_transposed, getrs_trans_t, dgetrs_, BLAS_D)
+ getrs_interface(lu_solve_transposed, getrs_trans_t, cgetrs_, BLAS_C)
+ getrs_interface(lu_solve_transposed, getrs_trans_t, zgetrs_, BLAS_Z)
+
+ /* ********************************************************************* */
+ /* LU inverse. */
+ /* ********************************************************************* */
+
+# define getri_interface(lapack_name, base_type) inline \
+ void lu_inverse(const dense_matrix<base_type > &LU, \
+ const lapack_ipvt &ipvt, const dense_matrix<base_type > &A_) { \
+ GMMLAPACK_TRACE("getri_interface"); \
+ dense_matrix<base_type >& \
+ A = const_cast<dense_matrix<base_type > &>(A_); \
+ long n = int(mat_nrows(A)), info(0), lwork(-1); base_type work1; \
+ if (n) { \
+ gmm::copy(LU, A); \
+ lapack_name(&n, &A(0,0), &n, ipvt.pfirst(), &work1, &lwork, &info); \
+ lwork = int(gmm::real(work1)); \
+ std::vector<base_type > work(lwork); \
+ lapack_name(&n, &A(0,0), &n, ipvt.pfirst(), &work[0], &lwork,&info); \
+ } \
+ }
+
+ getri_interface(sgetri_, BLAS_S)
+ getri_interface(dgetri_, BLAS_D)
+ getri_interface(cgetri_, BLAS_C)
+ getri_interface(zgetri_, BLAS_Z)
+
/* ********************************************************************** */
/* QR factorization. */
/* ********************************************************************** */
diff --git a/src/gmm/gmm_opt.h b/src/gmm/gmm_opt.h
index 31e830e..87b1929 100644
--- a/src/gmm/gmm_opt.h
+++ b/src/gmm/gmm_opt.h
@@ -37,6 +37,8 @@
#ifndef GMM_OPT_H__
#define GMM_OPT_H__
+#include <gmm/gmm_dense_lu.h>
+
namespace gmm {
/* ********************************************************************* */
@@ -58,7 +60,7 @@ namespace gmm {
default :
{
dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
- std::vector<size_type> ipvt(mat_nrows(A));
+ lapack_ipvt ipvt(mat_nrows(A));
gmm::copy(A, B);
lu_factor(B, ipvt);
return lu_det(B, ipvt);
@@ -69,7 +71,8 @@ namespace gmm {
}
- template <typename T> T lu_inverse(const dense_matrix<T> &A_, bool doassert
= true) {
+ template <typename T> T lu_inverse(const dense_matrix<T> &A_,
+ bool doassert = true) {
dense_matrix<T>& A = const_cast<dense_matrix<T> &>(A_);
size_type N = mat_nrows(A);
T det(1);
@@ -110,7 +113,7 @@ namespace gmm {
}
default : {
dense_matrix<T> B(mat_nrows(A), mat_ncols(A));
- std::vector<long> ipvt(mat_nrows(A));
+ lapack_ipvt ipvt(mat_nrows(A));
gmm::copy(A, B);
size_type info = lu_factor(B, ipvt);
GMM_ASSERT1(!info, "non invertible matrix");