///////////////////////////////////////////////////////////////////////////////// // // Solution of linear systems involved in the Levenberg - Marquardt // minimization algorithm // Copyright (C) 2004 Manolis Lourakis (lourakis at ics forth gr) // Institute of Computer Science, Foundation for Research & Technology - Hellas // Heraklion, Crete, Greece. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // ///////////////////////////////////////////////////////////////////////////////// /* Solvers for the linear systems Ax=b. Solvers should NOT modify their A & B arguments! */ #ifndef LM_REAL // not included by Axb.c #error This file should not be compiled directly! #endif #ifdef LINSOLVERS_RETAIN_MEMORY #define __STATIC__ static #else #define __STATIC__ // empty #endif /* LINSOLVERS_RETAIN_MEMORY */ #ifdef HAVE_LAPACK /* prototypes of LAPACK routines */ #define GEQRF LM_MK_LAPACK_NAME(geqrf) #define ORGQR LM_MK_LAPACK_NAME(orgqr) #define TRTRS LM_MK_LAPACK_NAME(trtrs) #define POTF2 LM_MK_LAPACK_NAME(potf2) #define POTRF LM_MK_LAPACK_NAME(potrf) #define POTRS LM_MK_LAPACK_NAME(potrs) #define GETRF LM_MK_LAPACK_NAME(getrf) #define GETRS LM_MK_LAPACK_NAME(getrs) #define GESVD LM_MK_LAPACK_NAME(gesvd) #define GESDD LM_MK_LAPACK_NAME(gesdd) #define SYTRF LM_MK_LAPACK_NAME(sytrf) #define SYTRS LM_MK_LAPACK_NAME(sytrs) #define PLASMA_POSV LM_CAT_(PLASMA_, LM_ADD_PREFIX(posv)) #ifdef __cplusplus extern "C" { #endif /* QR decomposition */ extern int GEQRF(int *m, int *n, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info); extern int ORGQR(int *m, int *n, int *k, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info); /* solution of triangular systems */ extern int TRTRS(char *uplo, char *trans, char *diag, int *n, int *nrhs, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, int *info); /* Cholesky decomposition and systems solution */ extern int POTF2(char *uplo, int *n, LM_REAL *a, int *lda, int *info); extern int POTRF(char *uplo, int *n, LM_REAL *a, int *lda, int *info); /* block version of dpotf2 */ extern int POTRS(char *uplo, int *n, int *nrhs, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, int *info); /* LU decomposition and systems solution */ extern int GETRF(int *m, int *n, LM_REAL *a, int *lda, int *ipiv, int *info); extern int GETRS(char *trans, int *n, int *nrhs, LM_REAL *a, int *lda, int *ipiv, LM_REAL *b, int *ldb, int *info); /* Singular Value Decomposition (SVD) */ extern int GESVD(char *jobu, char *jobvt, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *info); /* lapack 3.0 new SVD routine, faster than xgesvd(). * In case that your version of LAPACK does not include them, use the above two older routines */ extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *iwork, int *info); /* LDLt/UDUt factorization and systems solution */ extern int SYTRF(char *uplo, int *n, LM_REAL *a, int *lda, int *ipiv, LM_REAL *work, int *lwork, int *info); extern int SYTRS(char *uplo, int *n, int *nrhs, LM_REAL *a, int *lda, int *ipiv, LM_REAL *b, int *ldb, int *info); #ifdef __cplusplus } #endif /* precision-specific definitions */ #define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR) #define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS) #define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol) #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU) #define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD) #define AX_EQ_B_BK LM_ADD_PREFIX(Ax_eq_b_BK) #define AX_EQ_B_PLASMA_CHOL LM_ADD_PREFIX(Ax_eq_b_PLASMA_Chol) /* * This function returns the solution of Ax = b * * The function is based on QR decomposition with explicit computation of Q: * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes * Q R x = b or R x = Q^T b. * The last equation can be solved directly. * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_QR(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; static int nb=0; /* no __STATIC__ decl. here! */ LM_REAL *a, *tau, *r, *work; int a_sz, tau_sz, r_sz, tot_sz; register int i, j; int info, worksz, nrhs=1; register LM_REAL sum; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ a_sz=m*m; tau_sz=m; r_sz=m*m; /* only the upper triangular part really needed */ if(!nb){ LM_REAL tmp; worksz=-1; // workspace query; optimal size is returned in tmp GEQRF((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&worksz, (int *)&info); nb=((int)tmp)/m; // optimal worksize is m*nb } worksz=nb*m; tot_sz=a_sz + tau_sz + r_sz + worksz; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; tau=a+a_sz; r=tau+tau_sz; work=r+r_sz; /* store A (column major!) into a */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; tau=a+a_sz; r=tau+tau_sz; work=r+r_sz; /* store A (column major!) into a */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; /* store A into a and B into x. A is assumed symmetric, * hence no transposition is needed */ memcpy(a, A, a_sz*sizeof(LM_REAL)); memcpy(x, B, m*sizeof(LM_REAL)); /* Cholesky decomposition of A */ //POTF2("L", (int *)&m, a, (int *)&m, (int *)&info); POTRF("L", (int *)&m, a, (int *)&m, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTF2) "/", POTRF) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for ", POTF2) "/", POTRF) " in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* solve using the computed Cholesky in one lapack call */ POTRS("L", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTRS) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } #if 0 /* alternative: solve the linear system L y = b ... */ TRTRS("L", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* ... solve the linear system L^T x = y */ TRTRS("L", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) "in ", AX_EQ_B_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } #endif /* 0 */ #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; } #ifdef HAVE_PLASMA /* Linear algebra using PLASMA parallel library for multicore CPUs. * http://icl.cs.utk.edu/plasma/ * * WARNING: BLAS multithreading should be disabled, e.g. setenv MKL_NUM_THREADS 1 */ #ifndef _LM_PLASMA_MISC_ /* avoid multiple inclusion of helper code */ #define _LM_PLASMA_MISC_ #include #include #include #include #include /* programmatically determine the number of cores on the current machine */ #ifdef _WIN32 #include #elif __linux #include #endif static int getnbcores() { #ifdef _WIN32 SYSTEM_INFO sysinfo; GetSystemInfo(&sysinfo); return sysinfo.dwNumberOfProcessors; #elif __linux return sysconf(_SC_NPROCESSORS_ONLN); #else // unknown system return 2<<1; // will be halved by right shift below #endif } static int PLASMA_ncores=-(getnbcores()>>1); // >0 if PLASMA initialized, <0 otherwise /* user-specified number of cores */ void levmar_PLASMA_setnbcores(int cores) { PLASMA_ncores=(cores>0)? -cores : ((cores)? cores : -2); } #endif /* _LM_PLASMA_MISC_ */ /* * This function returns the solution of Ax=b * * The function assumes that A is symmetric & positive definite and employs the * Cholesky decomposition implemented by PLASMA for homogeneous multicore processors. * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_PLASMA_CHOL(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; LM_REAL *a; int a_sz, tot_sz; int info, nrhs=1; if(A==NULL){ #ifdef LINSOLVERS_RETAIN_MEMORY if(buf) free(buf); buf=NULL; buf_sz=0; #endif /* LINSOLVERS_RETAIN_MEMORY */ PLASMA_Finalize(); PLASMA_ncores=-PLASMA_ncores; return 1; } /* calculate required memory size */ a_sz=m*m; tot_sz=a_sz; #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_PLASMA_CHOL) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_PLASMA_CHOL) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; /* store A into a and B into x; A is assumed to be symmetric, * hence no transposition is needed */ memcpy(a, A, a_sz*sizeof(LM_REAL)); memcpy(x, B, m*sizeof(LM_REAL)); /* initialize PLASMA */ if(PLASMA_ncores<0){ PLASMA_ncores=-PLASMA_ncores; PLASMA_Init(PLASMA_ncores); fprintf(stderr, RCAT("\n", AX_EQ_B_PLASMA_CHOL) "(): PLASMA is running on %d cores.\n\n", PLASMA_ncores); } /* Solve the linear system */ info=PLASMA_POSV(PlasmaLower, m, 1, a, m, x, m); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", PLASMA_POSV) " in ", AX_EQ_B_PLASMA_CHOL) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\n" "the factorization could not be completed for ", PLASMA_POSV) " in ", AX_EQ_B_CHOL) "()\n", info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; } #endif /* HAVE_PLASMA */ /* * This function returns the solution of Ax = b * * The function employs LU decomposition: * If A=L U with L lower and U upper triangular, then the original system * amounts to solving * L y = b, U x = y * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ LM_REAL *buf=NULL; __STATIC__ int buf_sz=0; int a_sz, ipiv_sz, tot_sz; register int i, j; int info, *ipiv, nrhs=1; LM_REAL *a; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ ipiv_sz=m; a_sz=m*m; tot_sz=a_sz*sizeof(LM_REAL) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */ #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; ipiv=(int *)(a+a_sz); /* store A (column major!) into a and B into x */ for(i=0; ibuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; u=a+a_sz; s=u+u_sz; vt=s+s_sz; work=vt+vt_sz; iwork=(int *)(work+worksz); /* store A (column major!) into a */ for(i=0; i0.0; eps*=LM_CNST(0.5)) ; eps*=LM_CNST(2.0); } /* compute the pseudoinverse in a */ for(i=0; ithresh; rank++){ one_over_denom=LM_CNST(1.0)/s[rank]; for(j=0; jbuf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_BK) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_BK) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; work=a+a_sz; ipiv=(int *)(work+work_sz); /* store A into a and B into x; A is assumed to be symmetric, hence * the column and row major order representations are the same */ memcpy(a, A, a_sz*sizeof(LM_REAL)); memcpy(x, B, m*sizeof(LM_REAL)); /* LDLt factorization for A */ SYTRF("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", SYTRF) " in ", AX_EQ_B_BK) "()\n", -info); exit(1); } else{ fprintf(stderr, RCAT(RCAT("LAPACK error: singular block diagonal matrix D for", SYTRF) " in ", AX_EQ_B_BK)"() [D(%d, %d) is zero]\n", info, info); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } } /* solve the system with the computed factorization */ SYTRS("L", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info); if(info<0){ fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", SYTRS) " in ", AX_EQ_B_BK) "()\n", -info); exit(1); } #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 1; } /* undefine all. IT MUST REMAIN IN THIS POSITION IN FILE */ #undef AX_EQ_B_QR #undef AX_EQ_B_QRLS #undef AX_EQ_B_CHOL #undef AX_EQ_B_LU #undef AX_EQ_B_SVD #undef AX_EQ_B_BK #undef AX_EQ_B_PLASMA_CHOL #undef GEQRF #undef ORGQR #undef TRTRS #undef POTF2 #undef POTRF #undef POTRS #undef GETRF #undef GETRS #undef GESVD #undef GESDD #undef SYTRF #undef SYTRS #undef PLASMA_POSV #else // no LAPACK /* precision-specific definitions */ #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU_noLapack) /* * This function returns the solution of Ax = b * * The function employs LU decomposition followed by forward/back substitution (see * also the LAPACK-based LU solver above) * * A is mxm, b is mx1 * * The function returns 0 in case of error, 1 if successful * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */ int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m) { __STATIC__ void *buf=NULL; __STATIC__ int buf_sz=0; register int i, j, k; int *idx, maxi=-1, idx_sz, a_sz, work_sz, tot_sz; LM_REAL *a, *work, max, sum, tmp; if(!A) #ifdef LINSOLVERS_RETAIN_MEMORY { if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } #else return 1; /* NOP */ #endif /* LINSOLVERS_RETAIN_MEMORY */ /* calculate required memory size */ idx_sz=m; a_sz=m*m; work_sz=m; tot_sz=(a_sz+work_sz)*sizeof(LM_REAL) + idx_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */ #ifdef LINSOLVERS_RETAIN_MEMORY if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(void *)malloc(tot_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); exit(1); } } #else buf_sz=tot_sz; buf=(void *)malloc(tot_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n"); exit(1); } #endif /* LINSOLVERS_RETAIN_MEMORY */ a=buf; work=a+a_sz; idx=(int *)(work+work_sz); /* avoid destroying A, B by copying them to a, x resp. */ memcpy(a, A, a_sz*sizeof(LM_REAL)); memcpy(x, B, m*sizeof(LM_REAL)); /* compute the LU decomposition of a row permutation of matrix a; the permutation itself is saved in idx[] */ for(i=0; imax) max=tmp; if(max==0.0){ fprintf(stderr, RCAT("Singular matrix A in ", AX_EQ_B_LU) "()!\n"); #ifndef LINSOLVERS_RETAIN_MEMORY free(buf); #endif return 0; } work[i]=LM_CNST(1.0)/max; } for(j=0; j=max){ max=tmp; maxi=i; } } if(j!=maxi){ for(k=0; k=0; --i){ sum=x[i]; for(j=i+1; j