source: pcp/src/wannier.c@ 08e223

Last change on this file since 08e223 was 08e223, checked in by Frederik Heber <heber@…>, 17 years ago

ChangeWannierCentres can now be called from outside ComputeMLWF()

Wannier centres and spread are exchanged via MPI to allow for external calling (necessary if read src densities shall be perturbed with different CommonWannier values!)

  • Property mode set to 100644
File size: 99.1 KB
Line 
1/** \file wannier.c
2 * Maximally Localized Wannier Functions.
3 *
4 * Contains the on function that minimises the spread of all orbitals in one rush in a parallel
5 * Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to
6 * calculate the spread of a specific orbital, which may be useful in checking on the change of
7 * spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex.
8 *
9 Project: ParallelCarParrinello
10 \author Frederik Heber
11 \date 2006
12
13 File: wannier.c
14 $Id: wannier.c,v 1.7 2007-10-12 15:50:38 heber Exp $
15*/
16
17#include <math.h>
18#include <gsl/gsl_math.h>
19#include <gsl/gsl_eigen.h>
20#include <gsl/gsl_matrix.h>
21#include <gsl/gsl_vector.h>
22#include <gsl/gsl_complex.h>
23#include <gsl/gsl_complex_math.h>
24#include <gsl/gsl_sort_vector.h>
25#include <gsl/gsl_heapsort.h>
26#include <gsl/gsl_blas.h>
27#include <string.h>
28
29#include "data.h"
30#include "density.h"
31#include "errors.h"
32#include "gramsch.h"
33#include "helpers.h"
34#include "init.h"
35#include "myfft.h"
36#include "mymath.h"
37#include "output.h"
38#include "perturbed.h"
39#include "wannier.h"
40
41
42#define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread
43#define type Occupied
44
45
46/** Converts type fftw_complex to gsl_complex.
47 * \param a complex number
48 * \return b complex number
49 */
50gsl_complex convertComplex (fftw_complex a) {
51 return gsl_complex_rect(c_re(a),c_im(a));
52}
53
54/** "merry go round" implementation for parallel index ordering.
55 * Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is
56 * performed which generates once each possible pairing.
57 * \param *top index array 1
58 * \param *bot index array 2
59 * \param m N/2, where N is the matrix row/column dimension
60 * \note taken from [Golub, Matrix computations, 1989, p451]
61 */
62void MerryGoRoundIndices(int *top, int *bot, int m)
63{
64 int *old_top, *old_bot;
65 int k;
66 old_top = (int *) Malloc(sizeof(int)*m, "music: old_top");
67 old_bot = (int *) Malloc(sizeof(int)*m, "music: old_bot");
68/* fprintf(stderr,"oldtop\t");
69 for (k=0;k<m;k++)
70 fprintf(stderr,"%i\t", top[k]);
71 fprintf(stderr,"\n");
72 fprintf(stderr,"oldbot\t");
73 for (k=0;k<m;k++)
74 fprintf(stderr,"%i\t", bot[k]);
75 fprintf(stderr,"\n");*/
76 // first copy arrays
77 for (k=0;k<m;k++) {
78 old_top[k] = top[k];
79 old_bot[k] = bot[k];
80 }
81 // then let the music play
82 for (k=0;k<m;k++) {
83 if (k==1)
84 top[k] = old_bot[0];
85 else if (k > 1)
86 top[k] = old_top[k-1];
87 if (k==m-1)
88 bot[k] = old_top[k];
89 else
90 bot[k] = old_bot[k+1];
91 }
92/* fprintf(stderr,"top\t");
93 for (k=0;k<m;k++)
94 fprintf(stderr,"%i\t", top[k]);
95 fprintf(stderr,"\n");
96 fprintf(stderr,"bot\t");
97 for (k=0;k<m;k++)
98 fprintf(stderr,"%i\t", bot[k]);
99 fprintf(stderr,"\n");*/
100 // and finito
101 Free(old_top, "MerryGoRoundIndices: old_top");
102 Free(old_bot, "MerryGoRoundIndices: old_bot");
103}
104
105/** merry-go-round for matrix columns.
106 * The trick here is that we must be aware of multiple rotations per process, thus only some of the
107 * whole lot of local columns get sent/received, most of them are just shifted via exchanging the various
108 * pointers to the matrix columns within the local array.
109 * \param comm communicator for circulation
110 * \param *Aloc local array of columns
111 * \param Num entries per column
112 * \param max_rounds number of column pairs in \a *Around
113 * \param k offset for tag
114 * \param tagS0 MPI tag for sending left column
115 * \param tagS1 MPI tag for sending right column
116 * \param tagR0 MPI tag for receiving left column
117 * \param tagR1 MPI tag for receiving right column
118 */
119void MerryGoRoundColumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) {
120 //double *A_locS1, *A_locS2; // local columns of A[k]
121 //double *A_locR1, *A_locR2; // local columns of A[k]
122 MPI_Request requestS0, requestS1, requestR0, requestR1;
123 MPI_Status status;
124 int ProcRank, ProcNum;
125 int l;
126 MPI_Comm_size (comm, &ProcNum);
127 MPI_Comm_rank (comm, &ProcRank);
128 double *Abuffer1, *Abuffer2; // mark the columns that are circulated
129
130 //fprintf(stderr,"shifting...");
131 if (ProcRank == 0) {
132 if (max_rounds > 1) {
133 // get last left column
134 Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
135 MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
136 } else {
137 // get right column
138 Abuffer1 = Aloc[1]; // note down the free column
139 MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, tagS1+2*k, comm, &requestS0);
140 }
141
142 //fprintf(stderr,"...left columns...");
143 for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right
144 Aloc[l] = Aloc[l-2];
145
146 if (max_rounds > 1) {
147 //fprintf(stderr,"...first right...");
148 Aloc[2] = Aloc[1]; // get first right column
149 }
150
151 //fprintf(stderr,"...right columns...");
152 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
153 Aloc[l] = Aloc[l+2];
154
155 //fprintf(stderr,"...last right...");
156 Aloc[(2*max_rounds-1)] = Abuffer1;
157 MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
158
159 } else if (ProcRank == ProcNum-1) {
160 //fprintf(stderr,"...first right...");
161 // get first right column
162 Abuffer2 = Aloc[1]; // note down the free column
163 MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
164
165 //fprintf(stderr,"...right columns...");
166 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
167 Aloc[(l)] = Aloc[(l+2)];
168
169 //fprintf(stderr,"...last right...");
170 Aloc[(2*max_rounds-1)] = Aloc[2*(max_rounds-1)]; // Put last left into last right column
171
172 //fprintf(stderr,"...left columns...");
173 for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
174 Aloc[(l)] = Aloc[(l-2)];
175
176 //fprintf(stderr,"...first left...");
177// if (max_rounds > 1)
178 Aloc[0] = Abuffer2; // get first left column
179 MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
180
181 } else {
182 // get last left column
183 MPI_Isend(Aloc[2*(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
184 Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
185
186 //fprintf(stderr,"...first right...");
187 // get first right column
188 MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
189 Abuffer2 = Aloc[1]; // note down the free column
190
191 //fprintf(stderr,"...left columns...");
192 for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
193 Aloc[(l)] = Aloc[(l-2)];
194
195 //fprintf(stderr,"...right columns...");
196 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
197 Aloc[(l)] = Aloc[(l+2)];
198
199 //fprintf(stderr,"...first left...");
200 Aloc[0] = Abuffer1; // get first left column
201 MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
202
203 //fprintf(stderr,"...last right...");
204 Aloc[(2*max_rounds-1)] = Abuffer2;
205 MPI_Irecv(Aloc[(2*max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
206 }
207
208 //fprintf(stderr,"...waiting...");
209 if (ProcRank != ProcNum-1)
210 MPI_Wait(&requestS0, &status);
211 if (ProcRank != 0) // first left column
212 MPI_Wait(&requestR0, &status);
213 if (ProcRank != 0)
214 MPI_Wait(&requestS1, &status);
215 if (ProcRank != ProcNum-1)
216 MPI_Wait(&requestR1, &status);
217 //fprintf(stderr,"...done\n");
218}
219
220/** By testing of greatest common divisor of the matrix rows (\a AllocNum) finds suitable parallel cpu group.
221 * \param *P Problem at hand
222 * \param AllocNum number of rows in matrix
223 * \return address of MPI communicator
224 */
225#ifdef HAVE_INLINE
226inline MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
227#else
228MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
229#endif
230{
231 MPI_Comm *comm = &P->Par.comm_ST;
232
233 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi);
234 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Jacobi diagonalization is done parallely by ", P->Par.me);
235 if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) { // all parallel
236 comm = &P->Par.comm_ST;
237 //if (P->Call.out[ReadOut]) fprintf(stderr,"all\n");
238 } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first
239 if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
240 comm = &P->Par.comm_ST_Psi;
241 //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");
242 } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
243 comm = &P->Par.comm_ST_PsiT;
244 //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");
245 }
246 } else {
247 if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
248 comm = &P->Par.comm_ST_PsiT;
249 //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");
250 } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
251 comm = &P->Par.comm_ST_Psi;
252 //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");
253 }
254 }
255 return comm;
256}
257
258/** Allocates and fills Lookup table for sin/cos values at each grid node.
259 * \param ***cos_table pointer to two-dimensional lookup table for cosine values
260 * \param ***sin_table pointer to two-dimensional lookup table for sine values
261 * \param *N array with number of nodes per \a NDIM axis
262 */
263void CreateSinCosLookupTable(double ***cos_table, double ***sin_table, int *N)
264{
265 int i, j;
266 double argument;
267 double **cos_lookup, **sin_lookup;
268
269 // create lookup table for sin/cos values
270 cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup");
271 sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup");
272 for (i=0;i<NDIM;i++) {
273 // allocate memory
274 cos_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: cos_lookup");
275 sin_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: sin_lookup");
276
277 // reset arrays
278 SetArrayToDouble0(cos_lookup[i],N[i]);
279 SetArrayToDouble0(sin_lookup[i],N[i]);
280
281 // create lookup values
282 for (j=0;j<N[i];j++) {
283 argument = 2*PI/(double)N[i]*(double)j;
284 cos_lookup[i][j] = cos(argument);
285 sin_lookup[i][j] = sin(argument);
286 }
287 }
288 *cos_table = cos_lookup;
289 *sin_table = sin_lookup;
290}
291
292/** Frees memory allocated during CreateSinCosLookupTable().
293 * \param ***cos_lookup pointer to two-dimensional lookup table for cosine values
294 * \param ***sin_lookup pointer to two-dimensional lookup table for sine values
295 */
296void FreeSinCosLookupTable(double **cos_lookup, double **sin_lookup)
297{
298 int i;
299 // free lookups
300 for (i=0;i<NDIM;i++) {
301 Free(cos_lookup[i], "FreeSinCosLookupTable: cos_lookup[i]");
302 Free(sin_lookup[i], "FreeSinCosLookupTable: sin_lookup[i]");
303 }
304 Free(cos_lookup, "FreeSinCosLookupTable: cos_lookup");
305 Free(sin_lookup, "FreeSinCosLookupTable: sin_lookup");
306}
307
308/** Fills the entries of the six variance matrices.
309 * These matrices are parallely diagonalized during Wannier Localization. They are calculated from the
310 * wave function and by diagonalization one obtains the unitary transformation with which the Psis are
311 * treated afterwards.
312 * \param *P Problem at hand
313 * \param AllocNum number of rows/columns
314 * \param **A pointer to variance matrices
315 * \sa ComputeMLWF() - master function.
316 */
317void FillHigherOrderRealMomentsMatrices(struct Problem *P, int AllocNum, gsl_matrix **A)
318{
319 struct Lattice *Lat = &P->Lat;
320 struct RunStruct *R = &P->R;
321 struct Psis *Psi = &Lat->Psi;
322 struct LatticeLevel *Lev0 = R->Lev0;
323 struct LatticeLevel *LevS = R->LevS;
324 struct Density *Dens0 = Lev0->Dens;
325 struct OnePsiElement *OnePsiA, *OnePsiB, *LOnePsiB;
326 struct fft_plan_3d *plan = Lat->plan;
327 fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
328 fftw_real *PsiCR = (fftw_real *)PsiC;
329 fftw_complex *work = Dens0->DensityCArray[Temp2Density];
330 fftw_real **HGcR = &Dens0->DensityArray[HGDensity]; // use HGDensity, 4x Gap..Density, TempDensity as a storage array
331 fftw_complex **HGcRC = (fftw_complex**)HGcR;
332 fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
333 fftw_real **HGcR2 = (fftw_real**)HGcR2C;
334 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
335 MPI_Status status;
336 fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
337 int n[NDIM],n0,i0,iS, Index;
338 int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
339 int N0 = LevS->Plan0.plan->local_nx;
340 int *N = LevS->Plan0.plan->N;
341 const int NUpx = LevS->NUp[0];
342 const int NUpy = LevS->NUp[1];
343 const int NUpz = LevS->NUp[2];
344 double argument, PsiAtNode;
345 int e,g,i,j,k,l,m,p,u;
346 double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0;
347 double **cos_lookup = NULL,**sin_lookup = NULL;
348
349 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me);
350
351 debug(P, "Creating Lookup Table");
352 CreateSinCosLookupTable(&cos_lookup, &sin_lookup, N);
353
354 debug(P, "Calculating each entry of variance matrices");
355 l=-1; // to access U matrix element (0..Num-1)
356 // fill the matrices
357 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
358 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
359 if (OnePsiA->PsiType == type) { // drop all but occupied ones
360 l++; // increase l if it is non-extra wave function
361 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
362 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
363 else
364 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
365
366 //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0);
367 if (LPsiDatA != NULL) {
368 CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
369 // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
370 for (n0=0;n0<N0;n0++)
371 for (n[1]=0;n[1]<N[1];n[1]++)
372 for (n[2]=0;n[2]<N[2];n[2]++) {
373 i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
374 iS = n[2]+N[2]*(n[1]+N[1]*n0);
375 n[0] = n0 + LevS->Plan0.plan->start_nx;
376 for (k=0;k<max_operators;k+=2) {
377 e = k/2;
378 argument = 2.*PI/(double)(N[e])*(double)(n[e]);
379 PsiAtNode = PsiCR[i0] /LevS->MaxN;
380 // check lookup
381 if (!l) // perform check on first wave function only
382 if ((fabs(cos(argument) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(argument) - sin_lookup[e][n[e]]) > MYEPSILON)) {
383 Error(SomeError, "Lookup table does not match real value!");
384 }
385 HGcR[k][iS] = cos_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
386 HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */
387 HGcR[k+1][iS] = sin_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
388 HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
389 }
390 }
391 for (u=0;u<max_operators;u++) {
392 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work);
393 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work);
394 }
395 }
396 m = -1; // to access U matrix element (0..Num-1)
397 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
398 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
399 if (OnePsiB->PsiType == type) { // drop all but occupied ones
400 m++; // increase m if it is non-extra wave function
401 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
402 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
403 else
404 LOnePsiB = NULL;
405 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
406 RecvSource = OnePsiB->my_color_comm_ST_Psi;
407 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
408 LPsiDatB=LevS->LPsi->TempPsi;
409 } else { // .. otherwise send it to all other processes (Max_me... - 1)
410 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
411 if (p != OnePsiB->my_color_comm_ST_Psi)
412 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
413 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
414 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
415
416 for (u=0;u<max_operators;u++) {
417 a_ij = 0;
418 b_ij = 0;
419 if (LPsiDatA != NULL) { // calculate, reduce and send to all ...
420 //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u);
421 g=0;
422 if (LevS->GArray[0].GSq == 0.0) {
423 Index = LevS->GArray[g].Index;
424 a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im);
425 b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im);
426 g++;
427 }
428 for (; g < LevS->MaxG; g++) {
429 Index = LevS->GArray[g].Index;
430 a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im);
431 b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im);
432 } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...)
433 // sum up elements from all coefficients sharing processes
434 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
435 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
436 a_ij = A_ij;
437 b_ij = B_ij;
438 // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!)
439 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
440 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
441 } else { // receive ...
442 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
443 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
444 }
445 // ... and store
446 //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij);
447 //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij);
448 gsl_matrix_set(A[u], l, m, A_ij);
449 gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m));
450 }
451 }
452 }
453 }
454 }
455 // reset extra entries
456 for (u=0;u<=max_operators;u++) {
457 for (i=Num;i<AllocNum;i++)
458 for (j=0;j<AllocNum;j++)
459 gsl_matrix_set(A[u], i,j, 0.);
460 for (i=Num;i<AllocNum;i++)
461 for (j=0;j<AllocNum;j++)
462 gsl_matrix_set(A[u], j,i, 0.);
463 }
464 FreeSinCosLookupTable(cos_lookup, sin_lookup);
465
466/*// print A matrices for debug
467 if (P->Par.me == 0)
468 for (u=0;u<max_operators+1;u++) {
469 fprintf(stderr, "A[%i] = \n",u);
470 for (i=0;i<Num;i++) {
471 for (j=0;j<Num;j++)
472 fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j));
473 fprintf(stderr, "\n");
474 }
475 }
476*/
477}
478
479/** Calculates reciprocal second order moments of each PsiType#Occupied orbital.
480 * First order is zero due to wave function being real (symmetry condition).
481 * \param *P Problem at hand
482 */
483void CalculateSecondOrderReciprocalMoment(struct Problem *P)
484{
485 struct Lattice *Lat = &P->Lat;
486 struct RunStruct *R = &P->R;
487 struct FileData *F = &P->Files;
488 struct Psis *Psi = &Lat->Psi;
489 struct LatticeLevel *LevS = R->LevS;
490 double result, Result;
491 fftw_complex *LPsiDatA=NULL;
492 struct OnePsiElement *OnePsiA;
493 int i,j,l,g;
494 char spin[12], suffix[18];
495
496 switch (Lat->Psi.PsiST) {
497 case SpinDouble:
498 strcpy(suffix,".recispread.csv");
499 strcpy(spin,"SpinDouble");
500 break;
501 case SpinUp:
502 strcpy(suffix,".recispread_up.csv");
503 strcpy(spin,"SpinUp");
504 break;
505 case SpinDown:
506 strcpy(suffix,".recispread_down.csv");
507 strcpy(spin,"SpinDown");
508 break;
509 }
510 if(P->Par.me_comm_ST == 0) {
511 if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Calculating reciprocal moments ...\n",P->Par.me);
512 if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
513 OpenFile(P, &F->ReciSpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
514 else if (F->ReciSpreadFile == NULL) // re-op,18en if not first level and not opened yet (or closed from ParseWannierFile)
515 OpenFile(P, &F->ReciSpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
516 if (F->ReciSpreadFile == NULL) {
517 Error(SomeError,"ComputeMLWF: Error opening Reciprocal spread File!\n");
518 } else {
519 fprintf(F->ReciSpreadFile,"===== Reciprocal Spreads of type %s ==========================================================================\n", spin);
520 }
521 }
522
523 // integrate second order moment
524 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
525 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
526 if (OnePsiA->PsiType == type) { // drop all but occupied ones
527 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
528 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
529 else
530 LPsiDatA = NULL;
531
532 if (LPsiDatA != NULL) {
533 if (P->Par.me_comm_ST == 0)
534 fprintf(F->ReciSpreadFile,"Psi%d_Lev%d\t", Psi->AllPsiStatus[l].MyGlobalNo, R->LevSNo);
535 for (i=0;i<NDIM;i++) {
536 for (j=0;j<NDIM;j++) {
537 result = 0.;
538 g = 0;
539 if (LevS->GArray[0].GSq == 0.0) {
540 result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*(LPsiDatA[0].re * LPsiDatA[0].re);
541 g++;
542 }
543 for (;g<LevS->MaxG;g++)
544 result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*2.*(LPsiDatA[g].re * LPsiDatA[g].re + LPsiDatA[g].im * LPsiDatA[g].im);
545 //result *= Lat->Volume/LevS->MaxG;
546 MPI_Allreduce ( &result, &Result, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
547 if (P->Par.me_comm_ST == 0)
548 fprintf(F->ReciSpreadFile,"%2.5lg ", Result);
549 }
550 }
551 if (P->Par.me_comm_ST == 0)
552 fprintf(F->ReciSpreadFile,"\n");
553 }
554 }
555 }
556 if(P->Par.me_comm_ST == 0) {
557 fprintf(F->ReciSpreadFile,"====================================================================================================================\n\n");
558 fflush(F->ReciSpreadFile);
559 }
560}
561
562/** Given the unitary matrix the transformation is performed on the Psis.
563 * \param *P Problem at hand
564 * \param *U gsl matrix containing the transformation matrix
565 * \param Num dimension parameter for the matrix, i.e. number of wave functions
566 */
567void UnitaryTransformationOnWavefunctions(struct Problem *P, gsl_matrix *U, int Num)
568{
569 struct Lattice *Lat = &P->Lat;
570 struct RunStruct *R = &P->R;
571 struct Psis *Psi = &Lat->Psi;
572 struct LatticeLevel *LevS = R->LevS;
573 MPI_Status status;
574 struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB;
575 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
576 fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
577 int g,i,j,l,k,m,p;
578
579 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation of all wave functions according to U\n",P->Par.me);
580
581 Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here
582 fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer");
583
584 for (l=0;l<Num;l++) // allocate for each local wave function
585 coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l];
586
587 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me);
588 l=-1; // to access U matrix element (0..Num-1)
589 k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1)
590 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
591 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
592 if (OnePsiA->PsiType == type) { // drop all but occupied ones
593 l++; // increase l if it is occupied wave function
594 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local?
595 k++; // increase k only if it is a local, non-extra orbital wave function
596 LPsiDatA = (fftw_complex *) coeffs_buffer[k]; // new coeffs first go to copy buffer, old ones must not be overwritten yet
597 SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG); // zero buffer part
598 } else
599 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
600
601 m = -1; // to access U matrix element (0..Num-1)
602 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
603 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
604 if (OnePsiB->PsiType == type) { // drop all but occupied ones
605 m++; // increase m if it is occupied wave function
606 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
607 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
608 else
609 LOnePsiB = NULL;
610 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
611 RecvSource = OnePsiB->my_color_comm_ST_Psi;
612 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
613 LPsiDatB=LevS->LPsi->TempPsi;
614 } else { // .. otherwise send it to all other processes (Max_me... - 1)
615 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
616 if (p != OnePsiB->my_color_comm_ST_Psi)
617 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
618 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
619 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
620
621 if (LPsiDatA != NULL) {
622 double tmp = gsl_matrix_get(U,l,m);
623 g=0;
624 if (LevS->GArray[0].GSq == 0.0) {
625 LPsiDatA[g].re += LPsiDatB[g].re * tmp;
626 LPsiDatA[g].im += LPsiDatB[g].im * tmp;
627 g++;
628 }
629 for (; g < LevS->MaxG; g++) {
630 LPsiDatA[g].re += LPsiDatB[g].re * tmp;
631 LPsiDatA[g].im += LPsiDatB[g].im * tmp;
632 }
633 }
634 }
635 }
636 }
637 }
638
639 //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me);
640 // now, as all wave functions are updated, swap the buffer
641 l = -1;
642 for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function
643 if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {
644 l++;
645 //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo);
646 LPsiDatA = (fftw_complex *)coeffs_buffer[l];
647 LPsiDatB = LevS->LPsi->LocalPsi[l];
648 for (g=0;g<LevS->MaxG;g++) {
649 LPsiDatB[g].re = LPsiDatA[g].re;
650 LPsiDatB[g].im = LPsiDatA[g].im;
651 }
652 // recalculating non-local form factors which are coefficient dependent!
653 CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo);
654 }
655 }
656 // and free allocated buffer memory
657 Free(coeffs_buffer, "UnitaryTransformationOnWavefunctions: coeffs_buffer");
658}
659
660/** Changes Wannier Centres according to RunStruct#CommonWannier.
661 * \param *P Problem at hand.
662 */
663void ChangeWannierCentres(struct Problem *P)
664{
665 struct RunStruct *R = &P->R;
666 struct Lattice *Lat = &P->Lat;
667 struct LatticeLevel *LevS = R->LevS;
668 struct Psis *Psi = &Lat->Psi;
669 struct OnePsiElement *OnePsiA;
670 int *marker, **group;
671 int Num = Psi->NoOfPsis;
672 double **WannierCentre = NULL;
673 double *WannierSpread = NULL;
674 int partner[Num];
675 int i,j,l,k;
676 int totalflag, flag;
677 double q[NDIM], center[NDIM];
678 double Spread;
679 int *N = LevS->Plan0.plan->N;
680
681 int MPISignal = 0, mpisignal = 0;
682 unsigned int membersize, maxsize;
683 int msgsize;
684 double *buffer = NULL;
685 int position;
686 MPI_Status status;
687
688 // determine necessary pack/buffer sizes for the wannier stuff
689 MPI_Pack_size(3, MPI_DOUBLE, P->Par.comm_ST_PsiT, &membersize);
690 maxsize = membersize;
691 MPI_Pack_size(1, MPI_DOUBLE, P->Par.comm_ST_PsiT, &membersize);
692 maxsize += membersize;
693
694 // allocate memory
695 buffer = (double *) Malloc(sizeof(double)*maxsize, "ChangeWannierCentres: *buffer");
696 WannierSpread = (double *) Malloc(sizeof(double)*Num, "ChangeWannierCentres: *WannierSpread");
697 WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ChangeWannierCentres: **WannierCentre");
698 for (j=0;j<Num;j++)
699 WannierCentre[j] = (double *) Malloc(sizeof(double)*(NDIM+1), "ChangeWannierCentres: *WannierCentre[]");
700
701 // exchange wannier centers and spread among processes by going over all wave functions
702 i=-1;
703 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {
704 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
705 if (OnePsiA->PsiType == type) { // drop all but occupied ones
706 i++; // increase l if it is occupied wave function
707
708 // send and receive centres and spread (we use the MPI_Pack'ing scheme to preserve correct order of data)
709 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
710 position = 0;
711 MPI_Pack(Psi->AddData[OnePsiA->MyLocalNo].WannierCentre, 3, MPI_DOUBLE, buffer, maxsize, &position, P->Par.comm_ST_PsiT);
712 MPI_Pack(&Psi->AddData[OnePsiA->MyLocalNo].WannierSpread, 1, MPI_DOUBLE, buffer, maxsize, &position, P->Par.comm_ST_PsiT);
713 for (k=0;k<P->Par.Max_me_comm_ST_PsiT;k++)
714 if (k != P->Par.my_color_comm_ST_Psi) { // send to all other processes
715 fprintf(stderr, "(%i) Send goes from %d to %d\n", P->Par.me, k, P->Par.my_color_comm_ST_Psi);
716 if (MPI_Send(buffer, position, MPI_PACKED, k, WannierTag5, P->Par.comm_ST_PsiT) != MPI_SUCCESS) {
717 mpisignal = 1;
718 }
719 } else { // "send" to us
720 for(j=0;j<NDIM;j++) // copy to own array as well
721 WannierCentre[i][j] = Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j];
722 WannierSpread[i] = Psi->AddData[OnePsiA->MyLocalNo].WannierSpread;
723 }
724 } else {
725 fprintf(stderr, "(%i) Receive is from %d to %d\n", P->Par.me, OnePsiA->my_color_comm_ST_Psi, P->Par.my_color_comm_ST_Psi);
726 if (MPI_Recv(buffer, maxsize, MPI_PACKED, OnePsiA->my_color_comm_ST_Psi, WannierTag5, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
727 mpisignal = 1;
728 position = 0;
729 MPI_Get_count(&status, MPI_PACKED, &msgsize);
730 MPI_Unpack (buffer, msgsize, &position, WannierCentre[i], 3, MPI_DOUBLE, P->Par.comm_ST_PsiT);
731 MPI_Unpack (buffer, msgsize, &position, &WannierSpread[i], 1, MPI_DOUBLE, P->Par.comm_ST_PsiT);
732 }
733
734 // check for MPI errors
735 if (MPI_Allreduce(&mpisignal, &MPISignal, 1, MPI_INT, MPI_SUM, P->Par.comm_ST_PsiT) != MPI_SUCCESS) // allreduce signals
736 Error(SomeError,"ChangeWannierCentres: Bcast of signal failed\n");
737 if (MPISignal) { // greater 0 means there was at least one hickup somewhere
738 Error(SomeError,"ChangeWannierCentres: Scattering of Wanniers failed\n");
739 }
740 }
741 }
742 // free the buffer memory
743 Free(buffer, "ChangeWannierCentres: *buffer");
744
745 if ((P->Call.out[NormalOut]) && (1)) { // root prints gathered wannier centres for debugging
746 fprintf(stderr, "(%i) Printing gathered Wannier centres:\n", P->Par.me);
747 for(l=0;l<Num;l++) {
748 fprintf(stderr, "(%i) ", P->Par.me);
749 for(j=0;j<NDIM;j++)
750 fprintf(stderr, "%lg\t", WannierCentre[l][j]);
751 fprintf(stderr, "-\t%lg\n", WannierSpread[l]);
752 }
753 }
754
755 // change centres
756 switch (R->CommonWannier) {
757 case 4:
758 debug(P,"Shifting each Wannier centers to cell center");
759 for (j=0;j<NDIM;j++) // center point in [0,1]^3
760 center[j] = 0.5;
761 RMat33Vec3(q,Lat->RealBasis, center); // transform to real coordinates
762 for (i=0; i < Num; i++) { // go through all occupied wave functions
763 for (j=0;j<NDIM;j++) // put into Wannier centres
764 WannierCentre[i][j] = q[j];
765 }
766 break;
767 case 3:
768 debug(P,"Shifting Wannier centers individually to nearest grid point");
769 for (i=0;i < Num; i++) { // go through all wave functions
770 RMat33Vec3(q, Lat->ReciBasis, WannierCentre[i]);
771 for (j=0;j<NDIM;j++) { // Recibasis is not true inverse but times 2.*PI
772 q[j] *= (double)N[j]/(2.*PI);
773
774 //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp));
775 if (fabs((double)floor(q[j]) - q[j]) < fabs((double)ceil(q[j]) - q[j]))
776 q[j] = floor(q[j])/(double)N[j];
777 else
778 q[j] = ceil(q[j])/(double)N[j];
779 }
780 RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
781 }
782 break;
783 case 2:
784 debug(P,"Combining individual orbitals according to spread.");
785 //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me);
786 debug(P,"finding partners");
787 marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker");
788 group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group");
789 for (l=0;l<Num;l++) {
790 group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]"); // each must group must have one more as end marker
791 for (k=0;k<=Num;k++)
792 group[l][k] = -1; // reset partner group
793 }
794 for (k=0;k<Num;k++)
795 partner[k] = 0;
796 debug(P,"mem allocated");
797 // go for each orbital through every other, check distance against the sum of both spreads
798 // if smaller add to group of this orbital
799 for (l=0;l<Num;l++) {
800 j=0; // index for partner group
801 for (k=0;k<Num;k++) { // check this against l
802 Spread = 0.;
803 for (i=0;i<NDIM;i++) {
804 //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]);
805 Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]);
806 }
807 //Spread = sqrt(Spread); // distance in Spread
808 //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]);
809 if (Spread < 5*(WannierSpread[l]+WannierSpread[k])*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread times two plus safety
810 group[l][j++] = k; // add k to group of l
811 partner[l]++;
812 //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l);
813 }
814 }
815 }
816
817 // consistency, for each orbital check if this orbital is also in the group of each referred orbital
818 debug(P,"checking consistency");
819 totalflag = 1;
820 for (l=0;l<Num;l++) // checking l's group
821 for (k=0;k<Num;k++) { // k is partner index
822 if (group[l][k] != -1) { // if current index k is a partner
823 flag = 0;
824 for(j=0;j<Num;j++) { // go through each entry in l partner's partner group if l exists
825 if ((group[ group[l][k] ][j] == l))
826 flag = 1;
827 }
828 //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]);
829 if (totalflag == 1) totalflag = flag;
830 }
831 }
832 // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres
833 debug(P,"weight and calculate new centers for partner groups");
834 for (l=0;l<=Num;l++)
835 marker[l] = 1;
836 if (totalflag) {
837 for (l=0;l<Num;l++) { // go through each orbital
838 if (marker[l] != 0) { // if it hasn't been reweighted
839 marker[l] = 0;
840 for (i=0;i<NDIM;i++)
841 q[i] = 0.;
842 j = 0;
843 while (group[l][j] != -1) {
844 marker[group[l][j]] = 0;
845 for (i=0;i<NDIM;i++) {
846 //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]);
847 q[i] += WannierCentre[ group[l][j] ][i];
848 }
849 j++;
850 }
851 //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j);
852 for (i=0;i<NDIM;i++) {// weight by number of elements in partner group
853 q[i] /= (double)(j);
854 }
855
856 // put WannierCentre into own and all partners'
857 for (i=0;i<NDIM;i++)
858 WannierCentre[l][i] = q[i];
859 j = 0;
860 while (group[l][j] != -1) {
861 for (i=0;i<NDIM;i++)
862 WannierCentre[group[l][j]][i] = q[i];
863 j++;
864 }
865 }
866 }
867 }
868 if (P->Call.out[StepLeaderOut]) {
869 fprintf(stderr,"Summary:\n");
870 fprintf(stderr,"========\n");
871 for (i=0;i<Num;i++)
872 fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]);
873 }
874 debug(P,"done");
875
876 Free(marker, "ChangeWannierCentres: marker");
877 for (l=0;l<Num;l++)
878 Free(group[l], "ChangeWannierCentres: group[l]");
879 Free(group, "ChangeWannierCentres: group");
880 break;
881 case 1:
882 debug(P,"Individual orbitals are changed to center of all.");
883 for (i=0;i<NDIM;i++) // zero center of weight
884 q[i] = 0.;
885 for (k=0;k<Num;k++)
886 for (i=0;i<NDIM;i++) { // sum up all orbitals each component
887 q[i] += WannierCentre[k][i];
888 }
889 for (i=0;i<NDIM;i++) // divide by number
890 q[i] /= Num;
891 for (k=0;k<Num;k++)
892 for (i=0;i<NDIM;i++) { // put into this function's array
893 WannierCentre[k][i] = q[i];
894 }
895 break;
896 case 0:
897 default:
898 break;
899 }
900
901 if ((P->Call.out[NormalOut]) && (1)) { // root prints gathered wannier centres for debugging
902 fprintf(stderr, "(%i) Printing changed Wannier centres:\n", P->Par.me);
903 for(l=0;l<Num;l++) {
904 fprintf(stderr, "(%i) ", P->Par.me);
905 for(j=0;j<NDIM;j++)
906 fprintf(stderr, "%lg\t", WannierCentre[l][j]);
907 fprintf(stderr, "-\t%lg\n", WannierSpread[l]);
908 }
909 }
910
911 // store info into local arrays again
912 i=-1;
913 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
914 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
915 if (OnePsiA->PsiType == type) { // drop all but occupied ones
916 i++; // increase l if it is occupied wave function
917 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local
918 for(j=0;j<NDIM;j++)
919 Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
920 }
921 }
922 }
923
924 // show locally stored info for debugging
925 if (P->Call.out[NormalOut]) {
926 i=-1;
927 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
928 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
929 if (OnePsiA->PsiType == type) { // drop all but occupied ones
930 i++; // increase l if it is occupied wave function
931 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // is this local?
932 fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0], Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1], Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2], Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
933 }
934 }
935 }
936
937 // free memory
938 Free(WannierSpread, "ChangeWannierCentres: *WannierSpread");
939 for (j=0;j<Num;j++)
940 Free(WannierCentre[j], "ChangeWannierCentres: *WannierCentre[]");
941 Free(WannierCentre, "ChangeWannierCentres: **WannierCentre");
942}
943
944/** From the entries of the variance matrices the spread is calculated.
945 * WannierCentres are evaluated according to Resta operator: \f$\langle X \rangle = \frac{L}{2\pi} \sum_j {\cal I} \ln{\langle \Psi_j | \exp{(i \frac{2\pi}{L} X)} | \Psi_j \rangle}\f$
946 * WannierSpread is: \f$ \sum_j \langle \Psi_j | r^2 | \Psi_j \rangle - \langle \Psi_j | r | psi_j \rangle^2\f$
947 * \param *P Problem at hand
948 * \param *A variance matrices
949 * \param old_spread first term of wannier spread
950 * \param spread second term of wannier spread
951 * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
952 * \param *WannierSpread array with wannier spread per wave function
953 */
954void ComputeWannierCentresfromVarianceMatrices(struct Problem *P, gsl_matrix **A, double *spread, double *old_spread, double **WannierCentre, double *WannierSpread)
955{
956 struct Lattice *Lat = &P->Lat;
957 struct Psis *Psi = &Lat->Psi;
958 struct OnePsiElement *OnePsiA;
959 int i,j,k,l;
960 double tmp, q[NDIM];
961
962 *old_spread = 0;
963 *spread = 0;
964
965 // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital
966 i=-1;
967 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
968 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
969 if (OnePsiA->PsiType == type) { // drop all but occupied ones
970 i++; // increase l if it is occupied wave function
971 //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i);
972
973 // calculate Wannier Centre
974 for (j=0;j<NDIM;j++) {
975 q[j] = 1./(2.*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i))));
976 if (q[j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ
977 q[j] += 1.;
978 }
979 RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
980
981 // store orbital spread and centre in file
982 tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)
983 - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)
984 - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);
985 WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp;
986 //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]);
987 //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n",
988 //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]);
989 //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n",
990 //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]);
991
992 // gather all spreads
993 *old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U)
994 for (k=0;k<max_operators;k++)
995 *spread += pow(gsl_matrix_get(A[k],i,i),2);
996 }
997 }
998}
999
1000/** Prints gsl_matrix nicely to screen.
1001 * \param *P Problem at hand
1002 * \param *U gsl matrix
1003 * \param Num number of rows/columns
1004 * \param *msg name of matrix, prepended before entries
1005 */
1006void PrintGSLMatrix(struct Problem *P, gsl_matrix *U, int Num, const char *msg)
1007{
1008 int k,l;
1009 fprintf(stderr,"(%i) %s = \n",P->Par.me, msg);
1010 for (k=0;k<Num;k++) {
1011 for (l=0;l<Num;l++)
1012 fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k));
1013 fprintf(stderr,"\n");
1014 }
1015}
1016
1017/** Allocates memory for the DiagonalizationData structure
1018 * \param *P Problem at hand
1019 * \param DiagData pointer to structure
1020 * \param Num number of rows and columns in matrix
1021 * \param NumMatrices number of matrices to be simultaneously diagonalized
1022 * \param extra number of extra matrices to be passively also diagonalized
1023 */
1024void InitDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData, int Num, int NumMatrices, int extra)
1025{
1026 int i;
1027
1028 // store integer values
1029 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me);
1030 DiagData->Num = Num;
1031 DiagData->AllocNum = ceil((double)Num / 2. ) *2;
1032 DiagData->NumMatrices = NumMatrices;
1033 DiagData->extra = extra;
1034
1035 // determine communicator and our rank and its size
1036 DiagData->comm = DetermineParallelGroupbyGCD(P,DiagData->AllocNum);
1037 MPI_Comm_size (*(DiagData->comm), &(DiagData->ProcNum));
1038 MPI_Comm_rank (*(DiagData->comm), &(DiagData->ProcRank));
1039
1040 // allocate memory
1041 DiagData->U = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
1042 gsl_matrix_set_identity(DiagData->U);
1043
1044 DiagData->A = (gsl_matrix **) Malloc((DiagData->NumMatrices+DiagData->extra) * sizeof(gsl_matrix *), "InitDiagonalization: *A");
1045 for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) {
1046 DiagData->A[i] = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
1047 gsl_matrix_set_zero(DiagData->A[i]);
1048 }
1049
1050 // init merry-go-round array for diagonilzation
1051 DiagData->top = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: top");
1052 DiagData->bot = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: bot");
1053 for (i=0;i<DiagData->AllocNum/2;i++) {
1054 DiagData->top[i] = 2*i;
1055 DiagData->bot[i] = 2*i+1;
1056 }
1057/* // print starting values of index generation tables top and bot
1058 fprintf(stderr,"top\t");
1059 for (k=0;k<AllocNum/2;k++)
1060 fprintf(stderr,"%i\t", top[k]);
1061 fprintf(stderr,"\n");
1062 fprintf(stderr,"bot\t");
1063 for (k=0;k<AllocNum/2;k++)
1064 fprintf(stderr,"%i\t", bot[k]);
1065 fprintf(stderr,"\n");*/
1066}
1067
1068/** Frees allocated memory for the DiagonalizationData structure.
1069 * \param DiagData pointer to structure
1070 */
1071void FreeDiagonalization(struct DiagonalizationData *DiagData)
1072{
1073 int i;
1074
1075 Free(DiagData->top, "FreeDiagonalization: DiagData->top");
1076 Free(DiagData->bot, "FreeDiagonalization: DiagData->bot");
1077 gsl_matrix_free(DiagData->U);
1078 for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++)
1079 gsl_matrix_free(DiagData->A[i]);
1080 Free(DiagData->A, "FreeDiagonalization: DiagData->A");
1081}
1082
1083/** Orthogonalizing PsiType#Occupied wave functions for MD.
1084 * Orthogonalizes wave functions by finding a unitary transformation such that the density remains unchanged.
1085 * To this end, the overlap matrix \f$\langle \Psi_i | \Psi_j \rangle\f$ is created and diagonalised by Jacobi
1086 * transformation (product of rotation matrices). The found transformation matrix is applied to all Psis.
1087 * \param *P Problem at hand
1088 * \note [Payne] states that GramSchmidt is not well-suited. However, this Jacobi diagonalisation is not well
1089 * suited for our CG minimisation either. If one wave functions is changed in course of the minimsation
1090 * procedure, in a Gram-Schmidt-Orthogonalisation only this wave function is changed (although it remains under
1091 * debate whether subsequent Psis are still orthogonal to this changed wave function!), in a Jacobi-Orthogonali-
1092 * stion however at least two if not all wave functions are changed. Thus inhibits our fast way of updating the
1093 * density after a minimisation step -- UpdateDensityCalculation(). Thus, this routine can only be used for a
1094 * final orthogonalisation of all Psis, after the CG minimisation.
1095 */
1096void OrthogonalizePsis(struct Problem *P)
1097{
1098 struct Lattice *Lat = &P->Lat;
1099 struct Psis *Psi = &Lat->Psi;
1100 struct LatticeLevel *LevS = P->R.LevS;
1101 int Num = Psi->NoOfPsis;
1102 int i,j,l;
1103 struct DiagonalizationData DiagData;
1104 double PsiSP;
1105
1106 InitDiagonalization(P, &DiagData, Num, 1, 0);
1107
1108 // Calculate Overlap matrix
1109 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
1110 CalculateOverlap(P, l, Occupied);
1111 for (i=0;i<Num;i++) // fill gsl matrix with overlap values
1112 for (j=0;j<Num;j++)
1113 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
1114 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (before)");
1115
1116 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (before)");
1117
1118 // diagonalize overlap matrix
1119 Diagonalize(P, &DiagData);
1120
1121 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (after)");
1122
1123 // apply found unitary transformation to wave functions
1124 UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
1125
1126 // update GramSch stati
1127 for (i=Psi->TypeStartIndex[Occupied];i<Psi->TypeStartIndex[UnOccupied];i++) {
1128 GramSchNormalize(P,LevS,LevS->LPsi->LocalPsi[i], 0.); // calculates square product if 0. is given...
1129 PsiSP = GramSchGetNorm2(P,LevS,LevS->LPsi->LocalPsi[i]);
1130 //if (P->Par.me_comm_ST_Psi == 0) fprintf(stderr,"(%i) PsiSP[%i] = %lg\n", P->Par.me, i, PsiSP);
1131 Psi->LocalPsiStatus[i].PsiGramSchStatus = (int)IsOrthonormal;
1132 }
1133 UpdateGramSchAllPsiStatus(P,Psi);
1134/*
1135 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
1136 CalculateOverlap(P, l, Occupied);
1137 for (i=0;i<Num;i++) // fill gsl matrix with overlap values
1138 for (j=0;j<Num;j++)
1139 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
1140 if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (after)");
1141 */
1142 // free memory
1143 FreeDiagonalization(&DiagData);
1144}
1145
1146/** Orthogonalizing PsiType#Occupied wave functions and their time derivatives for MD.
1147 * Ensures that two orthonormality condition are met for Psis:
1148 * \f$\langle \Psi_i | \Psi_j \rangle = \delta_{ij}\f$
1149 * \f$\langle \dot{\Psi_i} | \dot{\Psi_j} \rangle = \delta_{ij}\f$
1150 * \param *P Problem at hand
1151 * \note [Payne] states that GramSchmidt is not well-suited, and this stronger
1152 * condition is needed for numerical stability
1153 * \todo create and fill 1stoverlap with finite difference between Psi, OldPsi with R->Deltat
1154 */
1155void StrongOrthogonalizePsis(struct Problem *P)
1156{
1157 struct Lattice *Lat = &P->Lat;
1158 struct Psis *Psi = &Lat->Psi;
1159 int Num = Psi->NoOfPsis;
1160 int i,j,l;
1161 struct DiagonalizationData DiagData;
1162
1163 InitDiagonalization(P, &DiagData, Num, 2, 0);
1164
1165 // Calculate Overlap matrix
1166 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) {
1167 CalculateOverlap(P, l, Occupied);
1168 //Calculate1stOverlap(P, l, Occupied);
1169 }
1170 for (i=0;i<Num;i++) // fill gsl matrix with overlap values
1171 for (j=0;j<Num;j++) {
1172 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
1173 //gsl_matrix_set(DiagData.A[1],i,j,Psi->1stOverlap[i][j]);
1174 }
1175
1176 // diagonalize overlap matrix
1177 Diagonalize(P, &DiagData);
1178
1179 // apply found unitary transformation to wave functions
1180 UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
1181
1182 // free memory
1183 FreeDiagonalization(&DiagData);
1184}
1185
1186/** Uses either serial or parallel diagonalization.
1187 * \param *P Problem at hand
1188 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1189 * \sa ParallelDiagonalization(), SerialDiagonalization()
1190 */
1191void Diagonalize(struct Problem *P, struct DiagonalizationData *DiagData)
1192{
1193 if (DiagData->Num != 1) { // one- or multi-process case?
1194 if (((DiagData->AllocNum % 2) == 0) && (DiagData->ProcNum != 1) && ((DiagData->AllocNum / 2) % DiagData->ProcNum == 0)) {
1195 /*
1196 debug(P,"Testing with silly matrix");
1197 ParallelDiagonalization(P, test, Utest, 1, 0, 4, Num, comm, ProcRank, ProcNum, top, bot);
1198 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1199 PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
1200 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1201 PrintGSLMatrix(P, Utest, 4, "Utest (final)");
1202 */
1203 debug(P,"ParallelDiagonalization");
1204 ParallelDiagonalization(P, DiagData);
1205 } else {/*
1206 debug(P,"Testing with silly matrix");
1207 SerialDiagonalization(P, test, Utest, 1, 0, 4, Num, top, bot);
1208 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1209 PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
1210 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1211 PrintGSLMatrix(P, Utest, 4, "Utest (final)");
1212 */
1213 debug(P,"SerialDiagonalization");
1214 SerialDiagonalization(P, DiagData);
1215 }
1216
1217 //if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1218 //PrintGSLMatrix(P, DiagData->U, DiagData->AllocNum, "U");
1219 }
1220}
1221
1222/** Computation of Maximally Localized Wannier Functions.
1223 * Maximally localized functions are prime when evulating a Hamiltonian with
1224 * magnetic fields under periodic boundary conditions, as the common position
1225 * operator is no longer valid. These can be obtained by orbital rotations, which
1226 * are looked for iteratively and gathered in one transformation matrix, to be
1227 * later applied to the set of orbital wave functions.
1228 *
1229 * In order to obtain these, the following algorithm is applied:
1230 * -# Initialize U (identity) as the sought-for transformation matrix
1231 * -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements
1232 * \f$A^{(k)}_{ij} = \langle \phi_i | A^{(k)} | \phi_j \rangle\f$ for the six operators
1233 * \f$A^{(k)}\f$
1234 * -# For each pair of indices (i,j) (i<j) do the following:
1235 * -# Compute the 2x2 matrix \f$G = \Re \Bigl ( \sum_k h^H(A^{(k)}) h(A^{(k)}) \Bigr)\f$
1236 * where \f$h(A) = [a_{ii} - a_{jj}, a_{ij} + a_{ji}]\f$
1237 * -# Obtain eigenvalues and eigenvectors of G. Set \f$[x,y]^T\f$ to the eigenvector of G
1238 * corresponding to the greatest eigenvalue, such that \f$x\geq0\f$
1239 * -# Compute the rotation matrix R elements (ii,ij,ji,jj) \f$[c,s,-s,c]\f$ different from the
1240 * identity matrix by \f$r=\sqrt{x^2+y^2}\f$, \f$c = \sqrt{\frac{x+r}{2r}}\f$
1241 * \f$s=\frac{y}{\sqrt{2r(x+r)}}\f$
1242 * -# Perform the similarity operation \f$A^{(k)} \rightarrow R A^{(k)} R\f$
1243 * -# Gather the rotations in \f$U = U R\f$
1244 * -# Compute the total spread \f$\sigma^2_{A^{(k)}}\f$
1245 * -# Compare the change in spread to a desired minimum RunStruct#EpsWannier, if still greater go to step 3.
1246 * -# Apply transformations to the orbital wavefunctions \f$ | \phi_i \rangle = \sum_j U_{ij} | \phi_j \rangle\f$
1247 * -# Compute the position of the Wannier centers from diagonal elements of \f$A^{(k)}\f$, store in
1248 * OnePsiElementAddData#WannierCentre
1249 *
1250 * Afterwards, the routine applies the found unitary rotation to the unperturbed group of wave functions.
1251 * Note that hereby additional memory is needed as old and transformed wave functions must be present at the same
1252 * time.
1253 *
1254 * The routine uses parallelization if possible. A parallel Jacobi-Diagonalization is implemented using the index
1255 * generation in music() and shift-columns() such that the evaluated position operator eigenvalue matrices
1256 * may be diagonalized simultaneously and parallely. We use the implementation explained in
1257 * [Golub, Matrix computations, 1989, p451].
1258 *
1259 * \param *P Problem at hand
1260 */
1261void ComputeMLWF(struct Problem *P) {
1262 // variables and allocation
1263 struct Lattice *Lat = &P->Lat;
1264 struct Psis *Psi = &Lat->Psi;
1265 int i;
1266 int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
1267 double **WannierCentre;
1268 double *WannierSpread;
1269 double spread, spreadSQ;
1270 struct DiagonalizationData DiagData;
1271
1272 if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me);
1273
1274 InitDiagonalization(P, &DiagData, Num, max_operators, 1);
1275
1276 // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1))
1277 //debug (P,"Calculatung Variance matrices");
1278 FillHigherOrderRealMomentsMatrices(P, DiagData.AllocNum, DiagData.A);
1279
1280 //debug (P,"Diagonalizing");
1281 Diagonalize(P, &DiagData);
1282
1283 // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle
1284 //debug (P,"Performing Unitary transformation");
1285 UnitaryTransformationOnWavefunctions(P, DiagData.U, Num);
1286
1287 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7: Compute centres and spread printout\n",P->Par.me);
1288 WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ComputeMLWF: *WannierCentre");
1289 WannierSpread = (double *) Malloc(sizeof(double)*Num, "ComputeMLWF: WannierSpread");
1290 for (i=0;i<Num;i++)
1291 WannierCentre[i] = (double *) Malloc(sizeof(double)*NDIM, "ComputeMLWF: WannierCentre");
1292
1293 //debug (P,"Computing Wannier centres from variance matrix");
1294 ComputeWannierCentresfromVarianceMatrices(P, DiagData.A, &spread, &spreadSQ, WannierCentre, WannierSpread);
1295
1296 // write to file
1297 //debug (P,"Writing spread file");
1298 WriteWannierFile(P, spread, spreadSQ, WannierCentre, WannierSpread);
1299
1300 debug(P,"Free'ing memory");
1301 // free all remaining memory
1302 FreeDiagonalization(&DiagData);
1303 for (i=0;i<Num;i++)
1304 Free(WannierCentre[i], "ComputeMLWF: WannierCentre[i]");
1305 Free(WannierCentre, "ComputeMLWF: WannierCentre");
1306 Free(WannierSpread, "ComputeMLWF: WannierSpread");
1307}
1308
1309/** Solves directly for eigenvectors and eigenvalues of a real 2x2 matrix.
1310 * The eigenvalues are determined by \f$\det{(A-\lambda \cdot I)}\f$, where \f$I\f$ is the unit matrix and
1311 * \f$\lambda\f$ an eigenvalue. This leads to a pq-formula which is easily evaluated in the first part.
1312 *
1313 * The eigenvectors are then obtained by solving \f$A-\lambda \cdot I)x = 0\f$ for a given eigenvalue
1314 * \f$\lambda\f$. However, the eigenvector magnitudes are not specified, thus we the equation system is
1315 * still lacking such an equation. We use this fact to set either coordinate arbitrarily to 1 and then
1316 * derive the other by solving the equation system. Finally, the eigenvector is normalized.
1317 * \param *P Problem at hand
1318 * \param *A matrix whose eigenvalues/-vectors are to be found
1319 * \param *eval vector with eigenvalues
1320 * \param *evec matrix with corresponding eigenvectors in columns
1321 */
1322#ifdef HAVE_INLINE
1323inline void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec)
1324#else
1325void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec)
1326#endif
1327{
1328 double a11,a12,a21,a22;
1329 double ev[2], summand1, summand2, norm;
1330 int i;
1331 // find eigenvalues
1332 a11 = gsl_matrix_get(A,0,0);
1333 a12 = gsl_matrix_get(A,0,1);
1334 a21 = gsl_matrix_get(A,1,0);
1335 a22 = gsl_matrix_get(A,1,1);
1336 summand1 = (a11+a22)/2.;
1337 summand2 = sqrt(summand1*summand1 + a12*a21 - a11*a22);
1338 ev[0] = summand1 + summand2;
1339 ev[1] = summand1 - summand2;
1340 gsl_vector_set(eval, 0, ev[0]);
1341 gsl_vector_set(eval, 1, ev[1]);
1342 //fprintf (stderr,"(%i) ev1 %lg \t ev2 %lg\n", P->Par.me, ev1, ev2);
1343
1344 // find eigenvectors
1345 for(i=0;i<2;i++) {
1346 if (fabs(ev[i]) < MYEPSILON) {
1347 gsl_matrix_set(evec, 0,i, 0.);
1348 gsl_matrix_set(evec, 1,i, 0.);
1349 } else if (fabs(a22-ev[i]) > MYEPSILON) {
1350 norm = sqrt(1*1 + (a21*a21/(a22-ev[i])/(a22-ev[i])));
1351 gsl_matrix_set(evec, 0,i, 1./norm);
1352 gsl_matrix_set(evec, 1,i, -(a21/(a22-ev[i]))/norm);
1353 //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
1354 } else if (fabs(a12) > MYEPSILON) {
1355 norm = sqrt(1*1 + (a11-ev[i])*(a11-ev[i])/(a12*a12));
1356 gsl_matrix_set(evec, 0,i, 1./norm);
1357 gsl_matrix_set(evec, 1,i, -((a11-ev[i])/a12)/norm);
1358 //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
1359 } else {
1360 if (fabs(a11-ev[i]) > MYEPSILON) {
1361 norm = sqrt(1*1 + (a12*a12/(a11-ev[i])/(a11-ev[i])));
1362 gsl_matrix_set(evec, 0,i, -(a12/(a11-ev[i]))/norm);
1363 gsl_matrix_set(evec, 1,i, 1./norm);
1364 //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
1365 } else if (fabs(a21) > MYEPSILON) {
1366 norm = sqrt(1*1 + (a22-ev[i])*(a22-ev[i])/(a21*a21));
1367 gsl_matrix_set(evec, 0,i, -(a22-ev[i])/a21/norm);
1368 gsl_matrix_set(evec, 1,i, 1./norm);
1369 //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
1370 } else {
1371 //gsl_matrix_set(evec, 0,i, 0.);
1372 //gsl_matrix_set(evec, 1,i, 1.);
1373 fprintf (stderr,"\t evec %i undetermined\n", i);
1374 }
1375 //gsl_matrix_set(evec, 0,0, 1.);
1376 //gsl_matrix_set(evec, 1,0, 0.);
1377 //fprintf (stderr,"(%i) evec1 undetermined", P->Par.me);
1378 }
1379 }
1380}
1381
1382/** Calculates sine and cosine values for multiple matrix diagonalization.
1383 * \param *evec eigenvectors in columns
1384 * \param *eval corresponding eigenvalues
1385 * \param *c cosine to be returned
1386 * \param *s sine to be returned
1387 */
1388#ifdef HAVE_INLINE
1389inline void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s)
1390#else
1391void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s)
1392#endif
1393{
1394 int index;
1395 double x,y,r;
1396
1397 index = gsl_vector_max_index (eval); // get biggest eigenvalue
1398 //fprintf(stderr,"\t1st: %lg\t2nd: %lg --- biggest: %i\n", gsl_vector_get(eval, 0), gsl_vector_get(eval, 1), index);
1399 x = gsl_matrix_get(evec, 0, index);
1400 y = gsl_matrix_get(evec, 1, index);
1401 if (x < 0) { // ensure x>=0 so that rotation angles remain smaller Pi/4
1402 y = -y;
1403 x = -x;
1404 }
1405 //z = gsl_matrix_get(evec, 2, index) * x/fabs(x);
1406 //fprintf(stderr,"\tx %lg\ty %lg\n", x,y);
1407
1408 //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j);
1409 // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]]
1410 r = sqrt(x*x + y*y); // + z*z);
1411 if (fabs(r) > MYEPSILON) {
1412 *c = sqrt((x + r) / (2.*r));
1413 *s = y / sqrt(2.*r*(x+r)); //, -z / sqrt(2*r*(x+r)));
1414 } else {
1415 *c = 1.;
1416 *s = 0.;
1417 }
1418}
1419
1420/*
1421 * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
1422 * \param *U transformation matrix set to unity matrix
1423 * \param NumMatrices number of matrices to be diagonalized simultaneously
1424 * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
1425 * \param AllocNum number of rows/columns in matrices
1426 * \param Num number of wave functions
1427 * \param ProcRank index in group for this cpu
1428 * \param ProcNum number of cpus in group
1429 * \param *top array with top row indices (merry-go-round)
1430 * \param *bot array with bottom row indices (merry-go-round)
1431 */
1432
1433/** Simultaneous diagonalization of matrices with multiple cpus.
1434 * \param *P Problem at hand
1435 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1436 * \note this is slower given one cpu only than SerialDiagonalization()
1437 */
1438void ParallelDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData)
1439{
1440 struct RunStruct *R =&P->R;
1441 int tagR0, tagR1, tagS0, tagS1;
1442 int iloc, jloc;
1443 double *s_all, *c_all;
1444 int round, max_rounds;
1445 int start;
1446 int *rcounts, *rdispls;
1447 double *c, *s;
1448 int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from
1449 int left, right; // left or right neighbour for process
1450 double spread = 0., old_spread=0., Spread=0.;
1451 int i,j,k,l,m,u;
1452 int set;
1453 int it_steps; // iteration step counter
1454 double **Aloc[DiagData->NumMatrices+1], **Uloc; // local columns for one step of A[k]
1455 double *Around[DiagData->NumMatrices+1], *Uround; // all local columns for one round of A[k]
1456 double *Atotal[DiagData->NumMatrices+1], *Utotal; // all local columns for one round of A[k]
1457 double a_i, a_j;
1458 gsl_matrix *G;
1459 gsl_vector *h;
1460 gsl_vector *eval;
1461 gsl_matrix *evec;
1462 //gsl_eigen_symmv_workspace *w;
1463
1464 max_rounds = (DiagData->AllocNum / 2)/DiagData->ProcNum; // each process must perform multiple rotations per step of a set
1465 if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, DiagData->ProcRank, DiagData->ProcNum, max_rounds);
1466
1467 // allocate column vectors for interchange of columns
1468 debug(P,"allocate column vectors for interchange of columns");
1469 c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c");
1470 s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s");
1471 c_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: c_all");
1472 s_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: s_all");
1473 rcounts = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rcounts");
1474 rdispls = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rdispls");
1475
1476 // allocate eigenvector stuff
1477 debug(P,"allocate eigenvector stuff");
1478 G = gsl_matrix_calloc (2,2);
1479 h = gsl_vector_alloc (2);
1480 eval = gsl_vector_alloc (2);
1481 evec = gsl_matrix_alloc (2,2);
1482 //w = gsl_eigen_symmv_alloc(2);
1483
1484 // establish communication partners
1485 debug(P,"establish communication partners");
1486 if (DiagData->ProcRank == 0) {
1487 tagS0 = WannierALTag; // left p0 always remains left p0
1488 } else {
1489 tagS0 = (DiagData->ProcRank == DiagData->ProcNum - 1) ? WannierARTag : WannierALTag; // left p_last becomes right p_last
1490 }
1491 tagS1 = (DiagData->ProcRank == 0) ? WannierALTag : WannierARTag; // right p0 always goes into left p1
1492 tagR0 = WannierALTag; //
1493 tagR1 = WannierARTag; // first process
1494 if (DiagData->ProcRank == 0) {
1495 left = DiagData->ProcNum-1;
1496 right = 1;
1497 Lsend = 0;
1498 Rsend = 1;
1499 Lrecv = 0;
1500 Rrecv = 1;
1501 } else if (DiagData->ProcRank == DiagData->ProcNum - 1) {
1502 left = DiagData->ProcRank - 1;
1503 right = 0;
1504 Lsend = DiagData->ProcRank;
1505 Rsend = DiagData->ProcRank - 1;
1506 Lrecv = DiagData->ProcRank - 1;
1507 Rrecv = DiagData->ProcRank;
1508 } else {
1509 left = DiagData->ProcRank - 1;
1510 right = DiagData->ProcRank + 1;
1511 Lsend = DiagData->ProcRank+1;
1512 Rsend = DiagData->ProcRank - 1;
1513 Lrecv = DiagData->ProcRank - 1;
1514 Rrecv = DiagData->ProcRank+1;
1515 }
1516 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv);
1517
1518 // initialise A_loc
1519 debug(P,"initialise A_loc");
1520 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
1521 //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]");
1522 Around[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Around[k]");
1523 Atotal[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Atotal[k]");
1524 Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]");
1525 //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds];
1526
1527 for (round=0;round<max_rounds;round++) {
1528 Aloc[k][2*round] = &Around[k][DiagData->AllocNum*(2*round)];
1529 Aloc[k][2*round+1] = &Around[k][DiagData->AllocNum*(2*round+1)];
1530 for (l=0;l<DiagData->AllocNum;l++) {
1531 Aloc[k][2*round][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round));
1532 Aloc[k][2*round+1][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)+1);
1533 //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]);
1534 }
1535 }
1536 }
1537 // initialise U_loc
1538 debug(P,"initialise U_loc");
1539 //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc");
1540 Uround = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Uround");
1541 Utotal = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Utotal");
1542 Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc");
1543 //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds];
1544 for (round=0;round<max_rounds;round++) {
1545 Uloc[2*round] = &Uround[DiagData->AllocNum*(2*round)];
1546 Uloc[2*round+1] = &Uround[DiagData->AllocNum*(2*round+1)];
1547 for (l=0;l<DiagData->AllocNum;l++) {
1548 Uloc[2*round][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round));
1549 Uloc[2*round+1][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)+1);
1550 //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]);
1551 }
1552 }
1553
1554 // now comes the iteration loop
1555 debug(P,"now comes the iteration loop");
1556 it_steps = 0;
1557 do {
1558 it_steps++;
1559 //if (P->Par.me == 0) fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps);
1560 for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 staying at its place all the time
1561 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
1562 for (round = 0; round < max_rounds;round++) {
1563 start = DiagData->ProcRank * max_rounds + round;
1564 // get indices
1565 i = DiagData->top[start] < DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // minimum of the two indices
1566 iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1;
1567 j = DiagData->top[start] > DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // maximum of the two indices: thus j > i
1568 jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
1569 //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc);
1570
1571 // calculate rotation angle, i.e. c and s
1572 //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j);
1573 gsl_matrix_set_zero(G);
1574 for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
1575 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
1576 //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
1577 //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
1578 gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
1579 gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
1580
1581 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
1582 for (l=0;l<2;l++)
1583 for (m=0;m<2;m++)
1584 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
1585 }
1586 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
1587 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
1588 EigensolverFor22Matrix(P,G,eval,evec);
1589 //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
1590
1591 CalculateRotationAnglesFromEigenvalues(evec, eval, &c[round], &s[round]);
1592 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]);
1593
1594 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
1595 // V_loc = V_loc * V_small
1596 //debug(P,"apply rotation to local U");
1597 for (l=0;l<DiagData->AllocNum;l++) {
1598 a_i = Uloc[2*round+iloc][l];
1599 a_j = Uloc[2*round+jloc][l];
1600 Uloc[2*round+iloc][l] = c[round] * a_i + s[round] * a_j;
1601 Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j;
1602 }
1603 } // end of round
1604 // circulate the rotation angles
1605 //debug(P,"circulate the rotation angles");
1606 MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *(DiagData->comm)); // MPI_Allgather is waaaaay faster than ring circulation
1607 MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *(DiagData->comm));
1608 //m = start;
1609 for (l=0;l<DiagData->AllocNum/2;l++) { // for each process
1610 // we have V_small from process k
1611 //debug(P,"Apply V_small from other process");
1612 i = DiagData->top[l] < DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // minimum of the two indices
1613 j = DiagData->top[l] > DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // maximum of the two indices: thus j > i
1614 iloc = DiagData->top[l] < DiagData->bot[l] ? 0 : 1;
1615 jloc = DiagData->top[l] > DiagData->bot[l] ? 0 : 1;
1616 for (m=0;m<max_rounds;m++) {
1617 //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j);
1618 // apply row rotation to each A[k]
1619 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1620 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
1621 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
1622 a_i = Aloc[k][2*m+iloc][i];
1623 a_j = Aloc[k][2*m+iloc][j];
1624 Aloc[k][2*m+iloc][i] = c_all[l] * a_i + s_all[l] * a_j;
1625 Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
1626 a_i = Aloc[k][2*m+jloc][i];
1627 a_j = Aloc[k][2*m+jloc][j];
1628 Aloc[k][2*m+jloc][i] = c_all[l] * a_i + s_all[l] * a_j;
1629 Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
1630 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
1631 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
1632 }
1633 }
1634 }
1635 // apply rotation to local operator matrices
1636 // A_loc = A_loc * V_small
1637 //debug(P,"apply rotation to local operator matrices A[k]");
1638 for (m=0;m<max_rounds;m++) {
1639 start = DiagData->ProcRank * max_rounds + m;
1640 iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1;
1641 jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
1642 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// extra for B matrix !
1643 for (l=0;l<DiagData->AllocNum;l++) {
1644 // Columns, i and j belong to this process only!
1645 a_i = Aloc[k][2*m+iloc][l];
1646 a_j = Aloc[k][2*m+jloc][l];
1647 Aloc[k][2*m+iloc][l] = c[m] * a_i + s[m] * a_j;
1648 Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j;
1649 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]);
1650 }
1651 }
1652 }
1653 // Shuffling of these round's columns to prepare next rotation set
1654 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1655 // extract columns from A
1656 //debug(P,"extract columns from A");
1657 MerryGoRoundColumns(*(DiagData->comm), Aloc[k], DiagData->AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1);
1658
1659 }
1660 // and also for V ...
1661 //debug(P,"extract columns from U");
1662 MerryGoRoundColumns(*(DiagData->comm), Uloc, DiagData->AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1);
1663
1664
1665 // and merry-go-round for the indices too
1666 //debug(P,"and merry-go-round for the indices too");
1667 MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
1668 }
1669
1670 //fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
1671 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
1672 old_spread = Spread;
1673 spread = 0.;
1674 for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators
1675 for (i=0; i < 2*max_rounds; i++) { // go through all wave functions
1676 spread += Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]*Aloc[k][i][i+DiagData->ProcRank*2*max_rounds];
1677 //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i);
1678 }
1679 }
1680 MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *(DiagData->comm));
1681 //Spread = spread;
1682 if (P->Par.me == 0) {
1683 //if(P->Call.out[ReadOut])
1684 // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier);
1685 //else
1686 //fprintf(stderr,"%2.9e\n",Spread);
1687 }
1688 // STEP 5: check change of variance
1689 } while (fabs(old_spread-Spread) >= R->EpsWannier);
1690 // end of iterative diagonalization loop: We have found our final orthogonal U!
1691
1692 // gather local parts of U into complete matrix
1693 for (l=0;l<DiagData->ProcNum;l++)
1694 rcounts[l] = DiagData->AllocNum;
1695 debug(P,"allgather U");
1696 for (round=0;round<2*max_rounds;round++) {
1697 for (l=0;l<DiagData->ProcNum;l++)
1698 rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum;
1699 MPI_Allgatherv(Uloc[round], DiagData->AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
1700 }
1701 for (k=0;k<DiagData->AllocNum;k++) {
1702 for(l=0;l<DiagData->AllocNum;l++) {
1703 gsl_matrix_set(DiagData->U,k,l, Utotal[l+k*DiagData->AllocNum]);
1704 }
1705 }
1706
1707 // after one set, gather A[k] from all and calculate spread
1708 for (l=0;l<DiagData->ProcNum;l++)
1709 rcounts[l] = DiagData->AllocNum;
1710 debug(P,"gather A[k] for spread");
1711 for (u=0;u<DiagData->NumMatrices+DiagData->extra;u++) {// extra for B matrix !
1712 debug(P,"A[k] all gather");
1713 for (round=0;round<2*max_rounds;round++) {
1714 for (l=0;l<DiagData->ProcNum;l++)
1715 rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum;
1716 MPI_Allgatherv(Aloc[u][round], DiagData->AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
1717 }
1718 for (k=0;k<DiagData->AllocNum;k++) {
1719 for(l=0;l<DiagData->AllocNum;l++) {
1720 gsl_matrix_set(DiagData->A[u],k,l, Atotal[u][l+k*DiagData->AllocNum]);
1721 }
1722 }
1723 }
1724
1725 // free eigenvector stuff
1726 gsl_vector_free(h);
1727 gsl_matrix_free(G);
1728 //gsl_eigen_symmv_free(w);
1729 gsl_vector_free(eval);
1730 gsl_matrix_free(evec);
1731
1732 // Free column vectors
1733 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
1734 Free(Atotal[k], "ParallelDiagonalization: Atotal[k]");
1735 Free(Around[k], "ParallelDiagonalization: Around[k]");
1736 }
1737 Free(Uround, "ParallelDiagonalization: Uround");
1738 Free(Utotal, "ParallelDiagonalization: Utotal");
1739 Free(c_all, "ParallelDiagonalization: c_all");
1740 Free(s_all, "ParallelDiagonalization: s_all");
1741 Free(c, "ParallelDiagonalization: c");
1742 Free(s, "ParallelDiagonalization: s");
1743 Free(rcounts, "ParallelDiagonalization: rcounts");
1744 Free(rdispls, "ParallelDiagonalization: rdispls");
1745}
1746/*
1747 * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
1748 * \param *U transformation matrix set to unity matrix
1749 * \param NumMatrices number of matrices to be diagonalized
1750 * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
1751 * \param AllocNum number of rows/columns in matrices
1752 * \param Num number of wave functions
1753 * \param *top array with top row indices (merry-go-round)
1754 * \param *bot array with bottom row indices (merry-go-round)
1755 */
1756
1757/** Simultaneous Diagonalization of variances matrices with one cpu.
1758 * \param *P Problem at hand
1759 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1760 * \note this is faster given one cpu only than ParallelDiagonalization()
1761 */
1762void SerialDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData)
1763{
1764 struct RunStruct *R = &P->R;
1765 gsl_matrix *G;
1766 gsl_vector *h;
1767 gsl_vector *eval;
1768 gsl_matrix *evec;
1769 //gsl_eigen_symmv_workspace *w;
1770 double *c,*s,a_i,a_j;
1771 int it_steps, set, ProcRank;
1772 int i,j,k,l,m;
1773 double spread = 0., old_spread = 0.;\
1774
1775 // allocate eigenvector stuff
1776 debug(P,"allocate eigenvector stuff");
1777 G = gsl_matrix_calloc (2,2);
1778 h = gsl_vector_alloc (2);
1779 eval = gsl_vector_alloc (2);
1780 evec = gsl_matrix_alloc (2,2);
1781 //w = gsl_eigen_symmv_alloc(2);
1782
1783 c = (double *) Malloc(sizeof(double), "ComputeMLWF: c");
1784 s = (double *) Malloc(sizeof(double), "ComputeMLWF: s");
1785 debug(P,"now comes the iteration loop");
1786 it_steps=0;
1787 do {
1788 it_steps++;
1789 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me);
1790 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps);
1791 for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time
1792 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
1793 // STEP 3: for all index pairs 0<= i<j <AllocNum
1794 for (ProcRank=0;ProcRank<DiagData->AllocNum/2;ProcRank++) {
1795 // get indices
1796 i = DiagData->top[ProcRank] < DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // minimum of the two indices
1797 j = DiagData->top[ProcRank] > DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // maximum of the two indices: thus j > i
1798 //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j);
1799 // STEP 3a: Calculate G
1800 gsl_matrix_set_zero(G);
1801
1802 for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
1803 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
1804 //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,i), gsl_matrix_get(DiagData->A[k],j,j),gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
1805 //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,j), gsl_matrix_get(DiagData->A[k],j,i),gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
1806 gsl_vector_set(h, 0, gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
1807 gsl_vector_set(h, 1, gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
1808 //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1));
1809
1810 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
1811 for (l=0;l<2;l++)
1812 for (m=0;m<2;m++)
1813 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
1814
1815 //PrintGSLMatrix(P,G,2, "G");
1816 }
1817
1818 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
1819 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
1820 EigensolverFor22Matrix(P,G,eval,evec);
1821 //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
1822
1823 //PrintGSLMatrix(P, evec, 2, "Eigenvectors");
1824 CalculateRotationAnglesFromEigenvalues(evec, eval, c, s);
1825 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]);
1826
1827 //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j);
1828 // STEP 3d: apply rotation R to rows and columns of A^{(k)}
1829 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1830 //PrintGSLMatrix(P,A[k],AllocNum, "A (before rot)");
1831 for (l=0;l<DiagData->AllocNum;l++) {
1832 // Rows
1833 a_i = gsl_matrix_get(DiagData->A[k],i,l);
1834 a_j = gsl_matrix_get(DiagData->A[k],j,l);
1835 gsl_matrix_set(DiagData->A[k], i, l, c[0] * a_i + s[0] * a_j);
1836 gsl_matrix_set(DiagData->A[k], j, l, -s[0] * a_i + c[0] * a_j);
1837 }
1838 for (l=0;l<DiagData->AllocNum;l++) {
1839 // Columns
1840 a_i = gsl_matrix_get(DiagData->A[k],l,i);
1841 a_j = gsl_matrix_get(DiagData->A[k],l,j);
1842 gsl_matrix_set(DiagData->A[k], l, i, c[0] * a_i + s[0] * a_j);
1843 gsl_matrix_set(DiagData->A[k], l, j, -s[0] * a_i + c[0] * a_j);
1844 }
1845 //PrintGSLMatrix(P,A[k],AllocNum, "A (after rot)");
1846 }
1847 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
1848 //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (before rot)");
1849 // STEP 3e: apply U = R*U
1850 for (l=0;l<DiagData->AllocNum;l++) {
1851 a_i = gsl_matrix_get(DiagData->U,i,l);
1852 a_j = gsl_matrix_get(DiagData->U,j,l);
1853 gsl_matrix_set(DiagData->U, i, l, c[0] * a_i + s[0] * a_j);
1854 gsl_matrix_set(DiagData->U, j, l, -s[0] * a_i + c[0] * a_j);
1855 }
1856 //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (after rot)");
1857 }
1858 // and merry-go-round for the indices too
1859 //debug(P,"and merry-go-round for the indices too");
1860 if (DiagData->AllocNum > 2) MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
1861 }
1862
1863 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
1864 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
1865 old_spread = spread;
1866 spread = 0.;
1867 for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators
1868 for (i=0; i < DiagData->AllocNum; i++) { // go through all wave functions
1869 spread += pow(gsl_matrix_get(DiagData->A[k],i,i),2);
1870 }
1871 }
1872 if(P->Par.me == 0) {
1873 //if(P->Call.out[ReadOut])
1874 // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier);
1875 //else
1876 //fprintf(stderr,"%2.9e\n",spread);
1877 }
1878 // STEP 5: check change of variance
1879 } while (fabs(old_spread-spread) >= R->EpsWannier);
1880 // end of iterative diagonalization loop: We have found our final orthogonal U!
1881 Free(c, "SerialDiagonalization: c");
1882 Free(s, "SerialDiagonalization: s");
1883 // free eigenvector stuff
1884 gsl_vector_free(h);
1885 gsl_matrix_free(G);
1886 //gsl_eigen_symmv_free(w);
1887 gsl_vector_free(eval);
1888 gsl_matrix_free(evec);
1889}
1890
1891/** Writes the wannier centres and spread to file.
1892 * Also puts centres from array into OnePsiElementAddData structure.
1893 * \param *P Problem at hand
1894 * \param old_spread first term of wannier spread
1895 * \param spread second term of wannier spread
1896 * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
1897 * \param *WannierSpread array with wannier spread per wave function
1898 */
1899void WriteWannierFile(struct Problem *P, double spread, double old_spread, double **WannierCentre, double *WannierSpread)
1900{
1901 struct FileData *F = &P->Files;
1902 struct RunStruct *R = &P->R;
1903 struct Lattice *Lat = &P->Lat;
1904 struct Psis *Psi = &Lat->Psi;
1905 struct OnePsiElement *OnePsiA;
1906 char spin[12], suffix[18];
1907 int i,j,l;
1908
1909 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me);
1910
1911 switch (Lat->Psi.PsiST) {
1912 case SpinDouble:
1913 strcpy(suffix,".spread.csv");
1914 strcpy(spin,"SpinDouble");
1915 break;
1916 case SpinUp:
1917 strcpy(suffix,".spread_up.csv");
1918 strcpy(spin,"SpinUp");
1919 break;
1920 case SpinDown:
1921 strcpy(suffix,".spread_down.csv");
1922 strcpy(spin,"SpinDown");
1923 break;
1924 }
1925 if (P->Par.me_comm_ST == 0) {
1926 if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
1927 OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
1928 else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile)
1929 OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
1930 if (F->SpreadFile == NULL) {
1931 Error(SomeError,"WriteWannierFile: Error opening Wannier File!\n");
1932 } else {
1933 fprintf(F->SpreadFile,"#===== W A N N I E R C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin);
1934 fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n");
1935 }
1936 }
1937
1938 // put (new) WannierCentres into local ones and into file
1939 i=-1;
1940 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
1941 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
1942 if (OnePsiA->PsiType == type) { // drop all but occupied ones
1943 i++; // increase l if it is occupied wave function
1944 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
1945 for (j=0;j<NDIM;j++)
1946 Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
1947 Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierSpread[i];
1948 }
1949 if (P->Par.me_comm_ST == 0)
1950 fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]);
1951 }
1952 }
1953 if (P->Par.me_comm_ST == 0) {
1954 fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n");
1955 fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread);
1956 fflush(F->SpreadFile);
1957 }
1958}
1959
1960
1961/** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre.
1962 * \param *P Problem at hand
1963 * \return 1 - success, 0 - failure
1964 */
1965int ParseWannierFile(struct Problem *P)
1966{
1967 struct Lattice *Lat = &P->Lat;
1968 struct RunStruct *R = &P->R;
1969 struct Psis *Psi = &Lat->Psi;
1970 struct OnePsiElement *OnePsiA;
1971 int i,l,j, msglen;
1972 FILE *SpreadFile;
1973 char *tagname = NULL;
1974 char suffix[18];
1975 double WannierCentre[NDIM+1]; // combined centre and spread
1976 MPI_Status status;
1977 int signal = 0; // 1 - ok, 0 - error
1978
1979 switch (Lat->Psi.PsiST) {
1980 case SpinDouble:
1981 strcpy(suffix,".spread.csv");
1982 break;
1983 case SpinUp:
1984 strcpy(suffix,".spread_up.csv");
1985 break;
1986 case SpinDown:
1987 strcpy(suffix,".spread_down.csv");
1988 break;
1989 }
1990 if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Parsing Wannier Centres from file ... \n", P->Par.me);
1991
1992 if (P->Par.me_comm_ST == 0) {
1993 tagname = (char *) Malloc(sizeof(char)*MAXSTRINGSIZE, "ParseWannierFile: *tagname");
1994 if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists
1995 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1996 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1997 return 0;
1998 //Error(SomeError,"ParseWannierFile: Opening failed\n");
1999 }
2000 signal = 1;
2001 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2002 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2003 } else {
2004 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2005 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2006 if (signal == 0)
2007 return 0;
2008 }
2009 i=-1;
2010 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
2011 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
2012 if (OnePsiA->PsiType == type) { // drop all but occupied ones
2013 i++; // increase l if it is occupied wave function
2014 if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file
2015 sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo);
2016 signal = 0;
2017 if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) {
2018 //Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed");
2019 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2020 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2021 return 0;
2022 }
2023 if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) {
2024 //Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed");
2025 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2026 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2027 return 0;
2028 }
2029 signal = 1;
2030 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2031 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2032 } else {
2033 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2034 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2035 if (signal == 0)
2036 return 0;
2037 }
2038 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local?
2039 if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em
2040 if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
2041 Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed");
2042 //return 0;
2043 MPI_Get_count(&status, MPI_DOUBLE, &msglen);
2044 if (msglen != NDIM+1)
2045 Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count");
2046 //return 0;
2047 }
2048 if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader
2049 Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed");
2050 //return 0;
2051 // and store 'em (for all who have this Psi local)
2052 if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]);
2053 for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j];
2054 Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM];
2055 //if (P->Par.me == 0 && P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
2056 } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group
2057 if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS)
2058 Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed");
2059 //return 0;
2060 }
2061 }
2062 }
2063 if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0)) {
2064 fclose(SpreadFile);
2065 Free(tagname, "ParseWannierFile: *tagname");
2066 }
2067 //fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me);
2068 return 1;
2069}
2070
2071/** Calculates the spread of orbital \a i.
2072 * Stored in OnePsiElementAddData#WannierSpread.
2073 * \param *P Problem at hand
2074 * \param i i-th wave function (note "extra" ones are not counted!)
2075 * \return spread \f$\sigma^2_{A^{(k)}}\f$
2076 */
2077double CalculateSpread(struct Problem *P, int i) {
2078 struct Lattice *Lat = &P->Lat;
2079 struct RunStruct *R = &P->R;
2080 struct Psis *Psi = &Lat->Psi;
2081 struct LatticeLevel *Lev0 = R->Lev0;
2082 struct LatticeLevel *LevS = R->LevS;
2083 struct Density *Dens0 = Lev0->Dens;
2084 struct fft_plan_3d *plan = Lat->plan;
2085 fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
2086 fftw_real *PsiCR = (fftw_real *)PsiC;
2087 fftw_complex *work = Dens0->DensityCArray[Temp2Density];
2088 fftw_real **HGcR = &Dens0->DensityArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as a storage array
2089 fftw_complex **HGcRC = (fftw_complex**)HGcR;
2090 fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
2091 fftw_real **HGcR2 = (fftw_real**)HGcR2C;
2092 MPI_Status status;
2093 struct OnePsiElement *OnePsiA, *LOnePsiA;
2094 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
2095 fftw_complex *LPsiDatA=NULL;
2096 int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index;
2097 N0 = LevS->Plan0.plan->local_nx;
2098 N = LevS->Plan0.plan->N;
2099 const int NUpx = LevS->NUp[0];
2100 const int NUpy = LevS->NUp[1];
2101 const int NUpz = LevS->NUp[2];
2102 double a_ij, b_ij, A_ij, B_ij;
2103 double tmp, tmp2, spread = 0;
2104 double **cos_lookup, **sin_lookup;
2105
2106 b_ij = 0;
2107
2108 CreateSinCosLookupTable(&cos_lookup,&sin_lookup,N);
2109
2110 // fill matrices
2111 OnePsiA = &Psi->AllPsiStatus[i]; // grab the desired OnePsiA
2112 if (OnePsiA->PsiType != Extra) { // drop if extra one
2113 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
2114 LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo];
2115 else
2116 LOnePsiA = NULL;
2117 if (LOnePsiA == NULL) { // if it's not local ... receive it from respective process into TempPsi
2118 RecvSource = OnePsiA->my_color_comm_ST_Psi;
2119 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status );
2120 LPsiDatA=LevS->LPsi->TempPsi;
2121 } else { // .. otherwise send it to all other processes (Max_me... - 1)
2122 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
2123 if (p != OnePsiA->my_color_comm_ST_Psi)
2124 MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT);
2125 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
2126 } // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received
2127
2128 CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
2129 // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
2130 for (n0=0;n0<N0;n0++)
2131 for (n[1]=0;n[1]<N[1];n[1]++)
2132 for (n[2]=0;n[2]<N[2];n[2]++) {
2133 i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
2134 iS = n[2]+N[2]*(n[1]+N[1]*n0);
2135 n[0] = n0 + LevS->Plan0.plan->start_nx;
2136 for (k=0;k<max_operators;k+=2) {
2137 tmp = 2*PI/(double)(N[k/2])*(double)(n[k/2]);
2138 tmp2 = PsiCR[i0] /LevS->MaxN;
2139 // check lookup
2140 if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) {
2141 fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]);
2142 Error(SomeError, "Lookup table does not match real value!");
2143 }
2144// HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
2145// HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */
2146// HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
2147// HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
2148 HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */
2149 HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */
2150 HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */
2151 HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */
2152 }
2153 }
2154 for (k=0;k<max_operators;k++) {
2155 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[k], work);
2156 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work);
2157 }
2158
2159
2160 for (k=0;k<max_operators;k++) {
2161 a_ij = 0;
2162 //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,k);
2163 // sum directly in a_ij and b_ij the two desired terms
2164 g=0;
2165 if (LevS->GArray[0].GSq == 0.0) {
2166 Index = LevS->GArray[g].Index;
2167 a_ij += (LPsiDatA[0].re*HGcRC[k][Index].re + LPsiDatA[0].im*HGcRC[k][Index].im);
2168 b_ij += (LPsiDatA[0].re*HGcR2C[k][Index].re + LPsiDatA[0].im*HGcR2C[k][Index].im);
2169 g++;
2170 }
2171 for (; g < LevS->MaxG; g++) {
2172 Index = LevS->GArray[g].Index;
2173 a_ij += 2*(LPsiDatA[g].re*HGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im);
2174 b_ij += 2*(LPsiDatA[g].re*HGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im);
2175 } // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...)
2176 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
2177 spread += pow(A_ij,2);
2178 }
2179 }
2180 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
2181
2182 // store spread in OnePsiElementAdd
2183 Psi->AddData[i].WannierSpread = B_ij - spread;
2184
2185 FreeSinCosLookupTable(cos_lookup,sin_lookup);
2186
2187 return (B_ij - spread);
2188}
Note: See TracBrowser for help on using the repository browser.