source: pcp/src/myfft.c@ f70c2a

Last change on this file since f70c2a was e08f45, checked in by Frederik Heber <heber@…>, 17 years ago

Merge branch 'ConcaveHull' of ../espack2 into ConcaveHull

Conflicts:

molecuilder/src/boundary.cpp
molecuilder/src/boundary.hpp
molecuilder/src/builder.cpp
molecuilder/src/linkedcell.cpp
molecuilder/src/linkedcell.hpp
molecuilder/src/vector.cpp
molecuilder/src/vector.hpp
util/src/NanoCreator.c

Basically, this resulted from a lot of conversions two from spaces to one tab, which is my standard indentation. The mess was caused by eclipse auto-indenting. And in espack2:ConcaveHull was the new stuff, so all from ConcaveHull was replaced in case of doubt.
Additionally, vector had ofstream << operator instead ostream << ...

  • Property mode set to 100755
File size: 33.4 KB
Line 
1/** \file myfft.c
2 * FastFourierTransformations for parallel computing.
3 * Implements FFT on multiple machines using fftw package. The most important
4 * routines are the hither and back transformations: fft_3d_real_to_complex() and
5 * fft_3d_complex_to_real(). For fftw plans are needed, which are created by
6 * fft_3d_create_plan() and destroyed by fft_3d_destroy_plan(). Plan weights
7 * can be copied CopyElementOnepw() and there are sort critera GetKeyOnepw(),
8 * GetKeyOnepw2PZI().\n
9 * Wave functions can - for the purpose of testing - be filled with random values
10 * by InitDataTestR().
11 *
12 Project: ParallelCarParrinello
13 \author Jan Hamaekers
14 \date 2000
15
16 File: myfft.c
17 $Id: myfft.c,v 1.23 2007/02/05 15:28:39 foo Exp $
18*/
19
20#ifdef HAVE_CONFIG_H
21#include <config.h>
22#endif
23
24#include<stdlib.h>
25#include<stdio.h>
26#include<unistd.h>
27#include<math.h>
28#include<string.h>
29
30// use double precision fft when we have it
31#ifdef HAVE_DFFTW_H
32#include "dfftw.h"
33#else
34#include "fftw.h"
35#endif
36
37#ifdef HAVE_DRFFTW_H
38#include "drfftw.h"
39#else
40#include "rfftw.h"
41#endif
42
43#include"data.h"
44#include"helpers.h"
45#include"output.h"
46#include"errors.h"
47#include"mergesort2.h"
48#include"mymath.h"
49#include"myfft.h"
50
51
52/** Returns negative value of the weight of a plan_weight as sort critera.
53 * \param *a plan_weight array
54 * \param i index of element
55 * \param *Args unused (only for contingency)
56 */
57static double GetKeyOnepw(void *a, int i, void *Args) {
58 return(-((struct plan_weight *)a)[i].weight);
59}
60
61/** Returns "index + twice the process number + \a Args[2]" of a plan_weight as sort critera.
62 * \a Args[2] is only added if: Index % \a Args[1] != Args[1] - 1
63 * \param *a plan_weight array
64 * \param i index of element
65 * \param *Args
66 */
67static double GetKeyOnepw2PZI(void *a, int i, void *Args) {
68 return(((struct plan_weight *)a)[i].Index+((int *)Args)[2]*( ( ((struct plan_weight *)a)[i].Index%((int *)Args)[1]!=((int *)Args)[1]-1)+2*(((struct plan_weight *)a)[i].PE)));
69}
70
71/** Copies one element from a plan weight array \a *b to another \a *a.
72 * Copies from \a i-th element of \a *a index, weight and PE to \a *b
73 * \param *a source plan weight array
74 * \param i index of source element
75 * \param *b destination plan weight array
76 * \param j index of destination element
77 */
78static void CopyElementOnepw(void *a, int i, void *b, int j) {
79 ((struct plan_weight *)a)[i].Index = ((struct plan_weight *)b)[j].Index;
80 ((struct plan_weight *)a)[i].weight = ((struct plan_weight *)b)[j].weight;
81 ((struct plan_weight *)a)[i].PE = ((struct plan_weight *)b)[j].PE;
82}
83
84/** Creates a 3d plan for the FFT to use in transforms.
85 * \sa fft_plan_3d
86 * \param *P Problem at hand
87 * \param comm MPI Communicator
88 * \param E_cut Energy cutoff
89 * \param MaxLevel number of lattice levels
90 * \param *LevelSizes size increas factor from level to level for MaxLevel's
91 * \param GBasisSQ[] scalar product of reciprocal basis vectors
92 * \param N[] mesh points per dimension NDIM
93 * \param *MaxNoOfnFields maximum number of NFields per level
94 * \param **NFields NField per level per field
95 */
96struct fft_plan_3d *fft_3d_create_plan(struct Problem *P, MPI_Comm comm, const double E_cut, const int MaxLevel, const int *LevelSizes, const double GBasisSQ[NDIM], const int N[NDIM], const int *MaxNoOfnFields, int **NFields) {
97 struct fft_plan_3d *plan;
98 int *MaxNFields;
99 int i;
100 int j;
101 int ny;
102 int Index;
103 int LevelSize=1; // factorial level increase from 0th to maxlevel
104 int n[NDIM];
105 int Args[3];
106 double weight;
107 // Initialization of plan
108 plan = (struct fft_plan_3d *)
109 Malloc(sizeof(struct fft_plan_3d),"create_plan");
110 plan->P = P;
111 plan->local_data = NULL;
112 plan->work = NULL;
113 plan->MaxLevel = MaxLevel;
114 plan->LevelSizes = (int *)
115 Malloc(sizeof(int)*MaxLevel,"create_plan: MaxLevels");
116 for (i=0; i < MaxLevel; i++) {
117 LevelSize *= LevelSizes[i];
118 plan->LevelSizes[i] = LevelSizes[i];
119 }
120 plan->E_cut = E_cut;
121 plan->LevelSize = LevelSize;
122 if (N[0] % LevelSize != 0 || N[1] % LevelSize != 0 || N[2] % LevelSize != 0)
123 Error(SomeError,"create_plan: N[0] % LevelSize != 0 || N[1] % LevelSize != 0 || N[2] % LevelSize != 0");
124 // MPI part of plan
125 MPI_Comm_dup(comm, &plan->comm); // Duplicates an existing communicator with all its cached information
126 MPI_Comm_size(plan->comm, &plan->MaxPE); // Determines the size of the group associated with a communictor
127 MPI_Comm_rank(plan->comm, &plan->myPE); // Determines the rank of the calling process in the communicator
128 plan->PENo = (int *)
129 Malloc(2*sizeof(int)*plan->MaxPE,"create_plan: PENo"); // Malloc 2 ints per process in communicator group
130 for (i=0; i < 2*plan->MaxPE; i++) // all set to zero
131 plan->PENo[i] = 0;
132
133 for (i=0; i < NDIM; i++) {
134 plan->N[i] = N[i];
135 plan->GBasisSQ[i] = GBasisSQ[i];
136 plan->NLSR[i] = N[i] / LevelSize;
137 }
138 plan->Maxpw = (N[1] / LevelSize) * ((N[2] / LevelSize)/2+1);
139 if (P->Call.out[LeaderOut]) fprintf(stderr,"Create_Plan: E_cut %g, N[] %i %i %i, LS %i, Maxpw %i\n",E_cut, N[0],N[1],N[2], LevelSize, plan->Maxpw);
140 if (plan->NLSR[0] % plan->MaxPE != 0 || plan->Maxpw % plan->MaxPE != 0) // check if dividable among processes
141 Error(SomeError,"create_plan: plan->NLSR[0] % plan->MaxPE != 0 || plan->Maxpw % plan->MaxPE != 0");
142 plan->pw = (struct plan_weight *)
143 Malloc(plan->Maxpw*sizeof(struct plan_weight),"create_plan_weight"); // Allocating plan weight
144 plan->pwFS = (struct plan_weight *)
145 Malloc((plan->Maxpw+1)*sizeof(struct plan_weight),"create_plan_weight_FS");
146 for (n[1]=0; n[1] < plan->NLSR[1]; n[1]++) {
147 for (n[2]=0; n[2] < (plan->NLSR[2]/2+1); n[2]++) {
148 ny = (n[1] >= (plan->NLSR[1]/2+1) ? n[1]-plan->NLSR[1] : n[1]);
149 Index = CalcRowMajor2D(n[1],n[2],plan->NLSR[1],plan->NLSR[2]/2+1);
150 plan->pw[Index].Index = Index;
151 //fprintf(stderr,"(%i) plan->pw[%i] = %i\n",P->Par.me,Index, plan->pw[Index].Index);
152 if (GBasisSQ[0] < MYEPSILON) fprintf(stderr,"fft_3d_create_plan: GBasisSQ[0] = %lg\n",GBasisSQ[0]);
153 weight = (2.*E_cut/(LevelSize*LevelSize)-ny*ny*GBasisSQ[1]-n[2]*n[2]*GBasisSQ[2])/GBasisSQ[0];
154 if (weight > 0.0)
155 plan->pw[Index].weight = sqrt(weight);
156 else
157 plan->pw[Index].weight = 0.0;
158 if (n[2] == plan->NLSR[2]/2) plan->pw[Index].weight += 4.*sqrt(2.*E_cut/(LevelSize*LevelSize)/GBasisSQ[0]);
159 if (n[2] == 0) plan->pw[Index].weight += 2.*sqrt(2.*E_cut/(LevelSize*LevelSize)/GBasisSQ[0]);
160 }
161 }
162 naturalmergesort(plan->pw,plan->pwFS,0,plan->Maxpw-1,&GetKeyOnepw,NULL,&CopyElementOnepw);
163 if (plan->Maxpw < plan->MaxPE) Error(SomeError,"create_plan: plan->Maxpw < plan->MaxPE");
164 if (plan->Maxpw / plan->MaxPE < plan->NLSR[1] + plan->NLSR[1]/plan->MaxPE)
165 Error(SomeError,"create_plan: plan->Maxpw / plan->MaxPE < plan->NLSR[1] + plan->NLSR[1]/plan->MaxPE");
166 for (i=0; i < plan->Maxpw; i++) {
167 if (i < plan->NLSR[1]) {
168 plan->pw[i].PE = i % plan->MaxPE;
169 } else {
170 if (i-plan->NLSR[1] < plan->NLSR[1]) {
171 plan->pw[i].PE = 0;
172 } else {
173 if (i-2*plan->NLSR[1] < (plan->MaxPE-1)*plan->NLSR[1]) {
174 plan->pw[i].PE = ((i%(plan->MaxPE-1))+1);
175 } else {
176 plan->pw[i].PE = i%plan->MaxPE;
177 }
178 }
179 }
180 plan->PENo[2*(i % plan->MaxPE)]++;
181 if (plan->pw[i].Index%(plan->NLSR[2]/2+1) == plan->NLSR[2]/2)
182 plan->pw[i].weight -= 4.*sqrt(2.*E_cut/(LevelSize*LevelSize)/GBasisSQ[0]);
183 if (plan->pw[i].Index%(plan->NLSR[2]/2+1) == 0) {
184 plan->pw[i].weight -= 2.*sqrt(2.*E_cut/(LevelSize*LevelSize)/GBasisSQ[0]);
185 if (plan->pw[i].Index%(plan->NLSR[2]/2+1) >= plan->NLSR[2]/2+1) {
186 plan->pw[i].weight = 0.0;
187 } else {
188 if (plan->pw[i].Index % (plan->NLSR[2]/2+1) == 0) {
189 plan->PENo[2*(plan->pw[i].PE)+1] += floor(plan->pw[i].weight)+1;
190 } else {
191 plan->PENo[2*(plan->pw[i].PE)+1] += 2*floor(plan->pw[i].weight)+1;
192 }
193 }
194 } else {
195 plan->PENo[2*(plan->pw[i].PE)+1] += 2*floor(plan->pw[i].weight)+1;
196 }
197 }
198 plan->LocalMaxpw = plan->PENo[2*plan->myPE];
199 Args[0]=plan->MaxPE; Args[1]=plan->NLSR[2]/2+1; Args[2]=plan->Maxpw;
200 naturalmergesort(plan->pw,plan->pwFS,0,plan->Maxpw-1,&GetKeyOnepw2PZI,Args,&CopyElementOnepw);
201 for (j=0; j < plan->MaxPE; j++) {
202 for (i= plan->NLSR[1]/plan->MaxPE-1; i >= 0; i--) {
203 /*if (i != 0 && i+plan->LocalMaxpw*j >= i*(plan->NLSR[1]/2+1)+plan->LocalMaxpw*j)
204 Error(SomeError,"Createplan: i != 0 && i+plan->LocalMaxpw*j ? i*(plan->NLSR[2]/2+1)+plan->LocalMaxpw*j");*/
205 CopyElementOnepw(plan->pwFS,plan->LocalMaxpw,plan->pw,i+plan->LocalMaxpw*j);
206 CopyElementOnepw(plan->pw,i+plan->LocalMaxpw*j,plan->pw,(i+1)*(plan->NLSR[2]/2+1)-1+plan->LocalMaxpw*j);
207 CopyElementOnepw(plan->pw,(i+1)*(plan->NLSR[2]/2+1)-1+plan->LocalMaxpw*j,plan->pwFS,plan->LocalMaxpw);
208 }
209 }
210 plan->Localpw = &plan->pw[plan->myPE*plan->LocalMaxpw];
211 if (P->Call.out[PsiOut]) {
212 fprintf(stderr,"pw\n");
213 for (i=plan->myPE*plan->LocalMaxpw; i < (plan->myPE+1)*plan->LocalMaxpw; i++) // print my local weights
214 fprintf(stderr,"(%i)W(%g)I(%i)P(%i) ",i,plan->pw[i].weight,plan->pw[i].Index,plan->pw[i].PE);
215 fprintf(stderr,"\n");
216 }
217 if (P->Call.out[LeaderOut]) {
218 fprintf(stderr,"pwPENo\n");
219 for (i=0; i < plan->MaxPE; i++)
220 fprintf(stderr,"(%i)W(%i)X(%i) ",i,plan->PENo[2*i+1],plan->PENo[2*i]);
221 fprintf(stderr,"\n");
222 }
223 Free(plan->pwFS, "fft_3d_create_plan: plan->pwFS");
224 plan->pwFS = NULL;
225
226 plan->Lev_CR_RC = (int *)
227 Malloc(2*sizeof(int)*MaxLevel,"create_plan: Lev_CR_RC");
228 for (i=0; i < 2*MaxLevel; i++) plan->Lev_CR_RC[i] = 1;
229 plan->Levplan = (struct LevelPlan *)
230 Malloc(sizeof(struct LevelPlan)*MaxLevel,"create_plan:Levplan");
231 plan->MaxNoOfnFields = (int *)
232 Malloc(sizeof(int)*MaxLevel,"create_plan:MaxNoOfnFields");
233 for (i=0; i<MaxLevel;i++)
234 plan->MaxNoOfnFields[i] = MaxNoOfnFields[i];
235 plan->NFields = (int **)
236 Malloc(sizeof(int *)*MaxLevel,"create_plan:NFields");
237 MaxNFields = (int *)
238 Malloc(sizeof(int)*MaxLevel,"create_plan:MaxNFields");
239 for (i=0; i<MaxLevel;i++) {
240 plan->NFields[i] = (int *)
241 Malloc(sizeof(int )*MaxNoOfnFields[i],"create_plan:NFields");
242 MaxNFields[i] = 0;
243 for (j=0; j < plan->MaxNoOfnFields[i]; j++) {
244 plan->NFields[i][j] = NFields[i][j];
245 if (NFields[i][j] < 1) Error(SomeError,"Create_plan: NFields[j] < 1");
246 if (NFields[i][j] > MaxNFields[i]) MaxNFields[i] = NFields[i][j];
247 }
248 }
249 plan->LevelBlockSize = plan->LevelSize*plan->LevelSize*plan->LevelSize;
250 for (i=MaxLevel-1; i >= 0; i--) {
251 if (i == MaxLevel-1)
252 plan->Levplan[i].LevelFactor = LevelSizes[MaxLevel-1];
253 else
254 plan->Levplan[i].LevelFactor = plan->Levplan[i+1].LevelFactor*LevelSizes[i];
255 for (j=0; j < NDIM; j++)
256 plan->Levplan[i].N[j] = plan->NLSR[j]*plan->Levplan[i].LevelFactor;
257 plan->Levplan[i].LevelNo = i;
258 plan->LocalDataSize = (plan->Levplan[i].N[0]*plan->Levplan[i].N[1]*(plan->Levplan[i].N[2]/2+1)*MaxNFields[i])/plan->MaxPE;
259 plan->LocalWorkSize = plan->LocalDataSize;
260 plan->local_data = (fftw_complex *)
261 Realloc(plan->local_data, sizeof(fftw_complex)*plan->LocalDataSize,"create_plan: localdata");
262 plan->work = (fftw_complex *)
263 Realloc(plan->work, sizeof(fftw_complex)*plan->LocalWorkSize,"create_plan: work");
264 plan->Levplan[i].ny = plan->Levplan[i].N[1];
265 plan->Levplan[i].local_ny = plan->Levplan[i].N[1]/plan->MaxPE;
266 plan->Levplan[i].nz = plan->Levplan[i].N[2]/2+1;
267 plan->Levplan[i].local_nx = plan->Levplan[i].N[0]/plan->MaxPE;
268 plan->Levplan[i].nx = plan->Levplan[i].N[0];
269 plan->Levplan[i].start_nx = plan->Levplan[i].local_nx * plan->myPE;
270 plan->Levplan[i].local_nyz = plan->Levplan[i].local_ny*plan->Levplan[i].nz;
271 plan->Levplan[i].nyz = plan->Levplan[i].ny*plan->Levplan[i].nz;
272 if (P->Call.out[LeaderOut]) fprintf(stderr,"(%i) nlocal (x%i, y%i, yz%i), n (%i, %i, %i), N (%i, %i, %i)\n", P->Par.me, plan->Levplan[i].local_nx, plan->Levplan[i].local_ny, plan->Levplan[i].local_nyz, plan->Levplan[i].nx, plan->Levplan[i].ny, plan->Levplan[i].nz, plan->Levplan[i].N[0], plan->Levplan[i].N[1], plan->Levplan[i].N[2]);
273 plan->Levplan[i].LocalSizeC = plan->Levplan[i].nx*plan->Levplan[i].local_nyz;
274 plan->Levplan[i].LocalSizeR = plan->Levplan[i].local_nx*plan->Levplan[i].ny*plan->Levplan[i].N[2];
275 plan->Levplan[i].LevelBlockSize = plan->Levplan[i].LevelFactor* plan->Levplan[i].LevelFactor*plan->Levplan[i].LevelFactor;
276 plan->Levplan[i].SendRecvBlockSize = plan->Levplan[i].local_nyz*plan->Levplan[i].local_nx;
277 plan->Levplan[i].PermBlockSize = plan->Levplan[i].LevelFactor;
278 if (P->Call.out[LeaderOut])
279 fprintf(stderr,"Create_Plan: Level(%i) LevelFactor(%i) LevelBlockSize(%i)\n",i,plan->Levplan[i].LevelFactor, plan->Levplan[i].LevelBlockSize);
280 if (plan->Lev_CR_RC[2*i]) {
281 plan->Levplan[i].xplanCR = (fftw_plan *)
282 Malloc(sizeof(fftw_plan)*plan->MaxNoOfnFields[i],"Create_Plan: xplanCR");
283 plan->Levplan[i].yzplanCR = (rfftwnd_plan *)
284 Malloc(sizeof(rfftwnd_plan)*plan->MaxNoOfnFields[i],"Create_Plan: yzplanCR");
285 for (j=0; j<plan->MaxNoOfnFields[i]; j++) {
286 plan->Levplan[i].xplanCR[j] = fftw_create_plan_specific(plan->Levplan[i].N[0], FFTW_BACKWARD, fftw_flag | FFTW_OUT_OF_PLACE,plan->local_data,plan->NFields[i][j],plan->work,plan->NFields[i][j]*plan->Levplan[i].local_nyz);
287 plan->Levplan[i].yzplanCR[j] = rfftw2d_create_plan_specific(plan->Levplan[i].N[1], plan->Levplan[i].N[2], FFTW_COMPLEX_TO_REAL, fftw_flag | FFTW_OUT_OF_PLACE,(fftw_real *)plan->local_data,plan->NFields[i][j],(fftw_real *)plan->work,plan->NFields[i][j]);
288 }
289 } else {
290 plan->Levplan[i].xplanCR = NULL;
291 plan->Levplan[i].yzplanCR = NULL;
292 }
293 if (plan->Lev_CR_RC[2*i+1]) {
294 plan->Levplan[i].xplanRC = (fftw_plan *)
295 Malloc(sizeof(fftw_plan)*plan->MaxNoOfnFields[i],"Create_Plan: xplanRC");
296 plan->Levplan[i].yzplanRC = (rfftwnd_plan *)
297 Malloc(sizeof(rfftwnd_plan)*plan->MaxNoOfnFields[i],"Create_Plan: yzplanRC");
298 for (j=0; j<plan->MaxNoOfnFields[i]; j++) {
299 plan->Levplan[i].xplanRC[j] = fftw_create_plan_specific(plan->Levplan[i].N[0], FFTW_FORWARD, fftw_flag | FFTW_OUT_OF_PLACE,plan->local_data,plan->NFields[i][j]*plan->Levplan[i].local_nyz,plan->work,plan->NFields[i][j]);
300 plan->Levplan[i].yzplanRC[j] = rfftw2d_create_plan_specific(plan->Levplan[i].N[1], plan->Levplan[i].N[2], FFTW_REAL_TO_COMPLEX, fftw_flag | FFTW_OUT_OF_PLACE,(fftw_real *)plan->local_data,plan->NFields[i][j],(fftw_real *)plan->work,plan->NFields[i][j]);
301 }
302 } else {
303 plan->Levplan[i].xplanRC = NULL;
304 plan->Levplan[i].yzplanRC = NULL;
305 }
306 }
307 Free(plan->local_data, "fft_3d_create_plan: plan->local_data");
308 Free(plan->work, "fft_3d_create_plan: plan->work");
309 Free(MaxNFields, "fft_3d_create_plan: MaxNFields");
310 return plan;
311}
312
313void fft_3d_destroy_plan(const struct Problem *P, struct fft_plan_3d *plan) {
314 int i,j;
315 //if (P->Call.out[LeaderOut]) fprintf(stderr,"Destroy_Plan\n");
316 for (i=0; i< plan->MaxLevel; i++) {
317 for (j=0; j< plan->MaxNoOfnFields[i]; j++) {
318 fftw_destroy_plan(plan->Levplan[i].xplanCR[j]);
319 fftw_destroy_plan(plan->Levplan[i].xplanRC[j]);
320 rfftwnd_destroy_plan(plan->Levplan[i].yzplanCR[j]);
321 rfftwnd_destroy_plan(plan->Levplan[i].yzplanRC[j]);
322 }
323 Free(plan->Levplan[i].xplanCR, "fft_3d_destroy_plan: plan->Levplan[i].xplanCR");
324 Free(plan->Levplan[i].xplanRC, "fft_3d_destroy_plan: plan->Levplan[i].xplanRC");
325 Free(plan->Levplan[i].yzplanCR, "fft_3d_destroy_plan: plan->Levplan[i].yzplanCR");
326 Free(plan->Levplan[i].yzplanRC, "fft_3d_destroy_plan: plan->Levplan[i].yzplanRC");
327 Free(plan->NFields[i], "fft_3d_destroy_plan: plan->NFields[i]");
328 }
329 Free(plan->MaxNoOfnFields, "fft_3d_destroy_plan: plan->MaxNoOfnFields");
330 Free(plan->NFields, "fft_3d_destroy_plan: plan->NFields");
331 Free(plan->Levplan, "fft_3d_destroy_plan: plan->Levplan");
332 Free(plan->PENo, "fft_3d_destroy_plan: plan->PENo");
333 plan->PENo = NULL;
334 Free(plan->LevelSizes, "fft_3d_destroy_plan: plan->LevelSizes");
335 plan->LevelSizes = NULL;
336 Free(plan->Lev_CR_RC, "fft_3d_destroy_plan: plan->Lev_CR_RC");
337 plan->Lev_CR_RC = NULL;
338 Free(plan->pw, "fft_3d_destroy_plan: plan->pw");
339 plan->pw = NULL;
340 plan->Levplan = NULL;
341 MPI_Comm_free(&plan->comm);
342 Free(plan, "fft_3d_destroy_plan: plan");
343}
344
345static void FirstDimTransCR(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* local_data -> work */
346 int local_nyz = Lplan->local_nyz;
347 int nx = Lplan->N[0];
348 int fft_iter;
349 int nFields = plan->NFields[Lplan->LevelNo][nFieldsNo];
350 fftw_plan fft_x = Lplan->xplanCR[nFieldsNo];
351 if (nFields > 1) {
352 for (fft_iter = 0; fft_iter < local_nyz; ++fft_iter)
353 fftw(fft_x, nFields,
354 local_data + (nx * nFields) * fft_iter, nFields, 1,
355 work+nFields*fft_iter, nFields*local_nyz, 1);
356 } else
357 fftw(fft_x, local_nyz,
358 local_data, 1, nx,
359 work, local_nyz, 1);
360}
361
362static void OtherDimTransCR(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* work -> local_data */
363 int local_nx = Lplan->local_nx;
364 int nyz = Lplan->nyz;
365 int fft_iter;
366 int rnyz = Lplan->N[1]*Lplan->N[2];
367 int nFields = plan->NFields[Lplan->LevelNo][nFieldsNo];
368 rfftwnd_plan fft = Lplan->yzplanCR[nFieldsNo];
369 if (nFields > 1) {
370 for (fft_iter = 0; fft_iter < local_nx; ++fft_iter)
371 rfftwnd_complex_to_real(fft, nFields,
372 ((fftw_complex *) work)+ (nyz * nFields) * fft_iter,
373 nFields, 1,
374 ((fftw_real *)local_data)+ (rnyz * nFields) * fft_iter
375 , nFields, 1);
376 } else {
377 rfftwnd_complex_to_real(fft, local_nx,
378 (fftw_complex *) work,
379 1, nyz,
380 (fftw_real *)local_data
381 , 1, rnyz);
382 }
383}
384
385static void TransposeMPICR(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int el_size, double *local_data, double *work) { /* work -> local_data */
386 MPI_Alltoall(work, Lplan->SendRecvBlockSize * el_size, MPI_DOUBLE,
387 local_data, Lplan->SendRecvBlockSize * el_size, MPI_DOUBLE,
388 plan->comm);
389}
390
391static void LocalPermutationCR(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int el_size, double *local_data, double *work) { /* data -> work */
392 int x,PEy,ly,z,Z,Y,srcIndex,destIndex,Index,cpyes,Ly;
393 int lnx=Lplan->local_nx,lny=plan->NLSR[1]/plan->MaxPE,nz=plan->NLSR[2]/2+1,MaxPE=plan->MaxPE,ny=plan->NLSR[1];
394 int es = el_size*Lplan->PermBlockSize;
395 int LevelFactor = Lplan->LevelFactor;
396 int snz = (nz-1)*es+el_size;
397 struct plan_weight *pw = plan->pw;
398 for (x=0; x < lnx; x++)
399 for (PEy=0; PEy < MaxPE; PEy++)
400 for (ly=0; ly < lny; ly++)
401 for (Ly=0; Ly < LevelFactor;Ly++)
402 for (z=0; z < nz; z++) {
403 Z = z*es;
404 srcIndex = Z+snz*(Ly+LevelFactor*(ly+lny*(x+lnx*(PEy))));
405 Index = pw[z+nz*(ly+lny*PEy)].Index;
406 Z = Index%nz;
407 Z = Z*es;
408 Y = ((Index/nz)*LevelFactor+Ly);
409 destIndex = Z+snz*(Y+LevelFactor*ny*(x));
410 cpyes = (z == nz-1 ? el_size : es);
411 memcpy( &work[destIndex],
412 &local_data[srcIndex],
413 cpyes * sizeof(double));
414 }
415}
416
417// does complex to real 3d fftransformation (reciprocal base -> real base)
418/** Inverse Fast Fourier Transformation of plane wave coefficients.
419 * If given state is \f$\langle \chi_{i,G} | \psi_i (r) \rangle \f$, result is \f$| \psi_i (r) \rangle\f$
420 * Calls subsequently (reverse order of calling as in fft_3d_real_to_complex())
421 * -# FirstDimTransCR()\n
422 * -# TransposeMPICR()\n
423 * -# LocalPermutationCR()\n
424 * -# OtherDimTransCR()\n
425 * \param *plan the transformation plan
426 * \param Level current level number
427 * \param nFieldsNo number of parallel ffts (in case of LevelUp)
428 * \param *local_data real base wave function coefficients, \f$\psi_i (G)\f$, on return \f$\psi_i (r)\f$
429 * \param *work temporary array for coefficients
430 * \warning coefficients in \a *local_date and *work are destroyed!
431 * \note Due to the symmetry of the reciprocal coefficients, not all of them are stored in memory. Thus
432 * the whole set must be rebuild before this inverse transformation can be called. From 0 to
433 * LatticeLevel::MaxG copied to negative counterpart and for 0 to LatticeLevel::MaxDoubleG the
434 * complex conjugate must be formed (see \ref density.c).
435 */
436void fft_3d_complex_to_real(const struct fft_plan_3d *plan, const int Level, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* local data -> local_data, work destroyed */
437 int el_size = (sizeof(fftw_complex) / sizeof(double));
438 int nFields = plan->NFields[Level][nFieldsNo];
439 struct LevelPlan *Lplan = &plan->Levplan[Level];
440 if (!plan->Lev_CR_RC[2*Level]) Error(SomeError,"fft_3d_complex_to_real: !plan->Lev_CR_RC[2*i]");
441 if (nFieldsNo < 0 || nFieldsNo >= plan->MaxNoOfnFields[Level]) Error(SomeError,"fft_3d_complex_to_real: nFieldsNo < 0 || nFieldsNo >= plan->MaxNoOfnFields[Level]");
442 el_size *= nFields;
443 //if (isnan(work[0].re))
444 //fprintf(stderr,"fft_3d_complex_to_real,FirstDimTransCR: before work %lg\n",work[0].re);
445 FirstDimTransCR(plan, Lplan, nFieldsNo, local_data, work);
446 //if (isnan(work[0].re))
447 //fprintf(stderr,"fft_3d_complex_to_real,FirstDimTransCR: after work %lg\n",work[0].re);
448 TransposeMPICR(plan, Lplan, el_size, (double *)local_data, (double *)work);
449 LocalPermutationCR(plan, Lplan, el_size, (double *)local_data, (double *)work);
450 OtherDimTransCR(plan, Lplan, nFieldsNo, local_data, work);
451}
452
453static void FirstDimTransRC(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* work -> local_data */
454 int local_nyz = Lplan->local_nyz;
455 int nx = Lplan->N[0];
456 int fft_iter;
457 int nFields = plan->NFields[Lplan->LevelNo][nFieldsNo];
458 fftw_plan fft_x = Lplan->xplanRC[nFieldsNo];
459 if (nFields > 1) {
460 for (fft_iter = 0; fft_iter < local_nyz; ++fft_iter)
461 fftw(fft_x, nFields,
462 work+nFields*fft_iter, nFields*local_nyz, 1,
463 local_data + (nx * nFields) * fft_iter, nFields, 1);
464 } else
465 fftw(fft_x, local_nyz,
466 work, local_nyz, 1,
467 local_data, 1, nx);
468}
469
470static void OtherDimTransRC(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* local_data -> work */
471 int local_nx = Lplan->local_nx;
472 int nyz = Lplan->nyz;
473 int rnyz = Lplan->N[1]*Lplan->N[2];
474 int fft_iter;
475 int nFields = plan->NFields[Lplan->LevelNo][nFieldsNo];
476 rfftwnd_plan fft = Lplan->yzplanRC[nFieldsNo];
477 if (nFields > 1) {
478 for (fft_iter = 0; fft_iter < local_nx; ++fft_iter)
479 rfftwnd_real_to_complex(fft, nFields,
480 ((fftw_real *) local_data) + (rnyz * nFields) * fft_iter,
481 nFields, 1,
482 ((fftw_complex *)work)+ (nyz * nFields) * fft_iter
483 , nFields, 1);
484 } else {
485 rfftwnd_real_to_complex(fft, local_nx,
486 (fftw_real *) local_data,
487 1, rnyz,
488 (fftw_complex *) work
489 , 1, nyz);
490 }
491}
492
493static void TransposeMPIRC(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int el_size, double *local_data, double *work) { /* local_data -> work */
494 MPI_Alltoall(local_data, Lplan->SendRecvBlockSize * el_size, MPI_DOUBLE,
495 work, Lplan->SendRecvBlockSize * el_size, MPI_DOUBLE,
496 plan->comm);
497}
498
499static void LocalPermutationRC(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int el_size, double *local_data, double *work) { /* work -> data*/
500 int x,PEy,ly,z,Z,Y,srcIndex,destIndex,Index,cpyes,Ly;
501 int lnx=Lplan->local_nx,lny=plan->NLSR[1]/plan->MaxPE,nz=plan->NLSR[2]/2+1,MaxPE=plan->MaxPE,ny=plan->NLSR[1];
502 int es = el_size*Lplan->PermBlockSize;
503 int LevelFactor = Lplan->LevelFactor;
504 int snz = (nz-1)*es+el_size;
505 struct plan_weight *pw = plan->pw;
506 for (x=0; x < lnx; x++)
507 for (PEy=0; PEy < MaxPE; PEy++)
508 for (ly=0; ly < lny; ly++)
509 for (Ly=0; Ly < LevelFactor;Ly++)
510 for (z=0; z < nz; z++) {
511 Z = z*es;
512 destIndex = Z+snz*(Ly+LevelFactor*(ly+lny*(x+lnx*(PEy))));
513 Index = pw[z+nz*(ly+lny*PEy)].Index;
514 Z = Index%nz;
515 Z = Z*es;
516 Y = ((Index/nz)*LevelFactor+Ly);
517 srcIndex = Z+snz*(Y+LevelFactor*ny*(x));
518 cpyes = (z == nz-1 ? el_size : es);
519 memcpy( &local_data[destIndex],
520 &work[srcIndex],
521 cpyes * sizeof(double));
522 }
523}
524
525/** Normal Fast Fourier Transformation of plane wave coefficients.
526 * If given state is \f$| \psi_i (r) \rangle \f$, result is \f$\langle \chi_{i,G} | \psi_i (r) \rangle\f$
527 * Calls subsequently
528 * -# OtherDimTransRC()\n
529 * -# LocalPermutationRC()\n
530 * -# TransposeMPIRC()\n
531 * -# FirstDimTransRC()\n
532 * \param *plan the transformation plan
533 * \param Level current level number
534 * \param nFieldsNo number of parallel ffts (in case of LevelUp)
535 * \param *local_data real base wave function coefficients, \f$\psi_i (r)\f$, on return \f$\psi_i (G)\f$
536 * \param *work temporary array for coefficients
537 * \warning coefficients in \a *local_date and *work are destroyed!
538 */
539void fft_3d_real_to_complex(const struct fft_plan_3d *plan, const int Level, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* local data -> local_data, work destroyed */
540 int el_size = (sizeof(fftw_complex) / sizeof(double));
541 int nFields = plan->NFields[Level][nFieldsNo];
542 struct LevelPlan *Lplan = &plan->Levplan[Level];
543 if (!plan->Lev_CR_RC[2*Level+1]) Error(SomeError,"fft_3d_real_to_complex: !plan->Lev_CR_RC[2*i+1]");
544 if (nFieldsNo < 0 || nFieldsNo >= plan->MaxNoOfnFields[Level]) Error(SomeError,"fft_3d_real_to_complex: nFieldsNo < 0 || nFieldsNo >= plan->MaxNoOfnFields[Level]");
545 el_size *= nFields;
546 OtherDimTransRC(plan, Lplan, nFieldsNo, local_data, work);
547 LocalPermutationRC(plan, Lplan, el_size, (double *)local_data, (double *)work);
548 TransposeMPIRC(plan, Lplan, el_size, (double *)local_data, (double *)work);
549 FirstDimTransRC(plan, Lplan, nFieldsNo, local_data, work);
550}
551
552/** Fills \a *local_data with random values.
553 * \param *plan fft plan
554 * \param Level current level number
555 * \param nFields number of nFields (of parallel ffts)
556 * \param *local_data array of coefficients to be set at random values
557 */
558void InitDataTestR(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_real *local_data) {
559 int x,y,z,n,Index,i;
560 int Nx = plan->Levplan[Level].N[0]/plan->MaxPE;
561 int Ny = plan->Levplan[Level].N[1];
562 int Nz = plan->Levplan[Level].N[2];
563 srand((unsigned int)Level);
564 for (i=0;i<plan->myPE*Nx*Ny*Nz*nFields;i++) rand();
565 for (x=0; x < Nx; x++) {
566 for (y = 0; y < Ny; y++) {
567 for (z = 0; z < Nz; z++) {
568 for (n=0; n < nFields; n++) {
569 Index = n+nFields*(z+Nz*(y+Ny*(x)));
570 local_data[Index]=1.-2.*(rand()/(double)RAND_MAX);
571 }
572 }
573 }
574 }
575}
576
577/*
578static void OutputDataTestWR(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_real *local_data,FILE *target) {
579 int x,y,z,n,Index;
580 int Nx = plan->Levplan[Level].N[0]/plan->MaxPE;
581 int Ny = plan->Levplan[Level].N[1];
582 int Nz = plan->Levplan[Level].N[2];
583 fprintf(target,"Level=%i\n",Level);
584 for (x=0; x < Nx; x++) {
585 fprintf(target,"x=%i\n",x+Nx*plan->myPE);
586 for (y = 0; y < Ny; y++) {
587 fprintf(target,"y=%5i ",y);
588 for (z = 0; z < Nz; z++) {
589 for (n=0; n < nFields; n++) {
590 Index = n+nFields*(z+2*(Nz/2+1)*(y+Ny*(x)));
591 fprintf(target,"%10.5f ",local_data[Index]);
592 }
593 }
594 fprintf(target,"\n");
595 }
596 }
597}
598
599static void OutputDataTestR(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_real *local_data, double factor,FILE *target) {
600 int x,y,z,n,Index;
601 int Nx = plan->Levplan[Level].N[0]/plan->MaxPE;
602 int Ny = plan->Levplan[Level].N[1];
603 int Nz = plan->Levplan[Level].N[2];
604 fprintf(target,"Level=%i\n",Level);
605 for (x=0; x < Nx; x++) {
606 fprintf(target,"x=%i\n",x+Nx*plan->myPE);
607 for (y = 0; y < Ny; y++) {
608 fprintf(target,"y=%5i ",y);
609 for (z = 0; z < Nz; z++) {
610 for (n=0; n < nFields; n++) {
611 Index = n+nFields*(z+Nz*(y+Ny*(x)));
612 fprintf(target,"%10.5f ",local_data[Index]*factor);
613 }
614 }
615 fprintf(target,"\n");
616 }
617 }
618}
619
620static void InitDataTestWR(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_real *local_data) {
621 int i,x,y,z,n,Index,Value=0;
622 int Nx = plan->Levplan[Level].N[0]/plan->MaxPE;
623 int Ny = plan->Levplan[Level].N[1];
624 int Nz = plan->Levplan[Level].N[2];
625 srand(Level);
626 for (i=0;i<plan->myPE*Nx*Ny*Nz*nFields;i++) rand();
627 for (x=0; x < Nx; x++) {
628 for (y = 0; y < Ny; y++) {
629 for (z = 0; z < Nz; z++) {
630 for (n=0; n < nFields; n++) {
631 Index = n+nFields*(z+2*(Nz/2+1)*(y+Ny*(x)));
632 local_data[Index] = 1.-2.*(rand()/(double)RAND_MAX);
633 Value++;
634 }
635 }
636 }
637 }
638}
639
640static void OutputDataTestWCA(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_complex *local_data,double factor,FILE *target) {
641 int x,y,z,yz,n,Index,Y,Z,YZ;
642 int Nx = plan->Levplan[Level].N[0];
643 int Ny = plan->Levplan[Level].N[1]/plan->MaxPE;
644 int Nz = plan->Levplan[Level].N[2]/2+1;
645 int NZ = plan->NLSR[2]/2+1;
646 fprintf(target,"Level=%i\n",Level);
647 for (y = 0; y < Ny; y++) {
648 for (z = 0; z < Nz; z++) {
649 Y = (y+plan->myPE*Ny)/plan->Levplan[Level].LevelFactor;
650 Z = z/plan->Levplan[Level].LevelFactor;
651 YZ = Z+NZ*Y;
652 yz = z+Nz*y;
653 fprintf(target,"yz=%i(%i)\n",yz+plan->myPE*Ny*Nz,YZ);
654 for (x=0; x < Nx; x++) {
655 for (n=0; n < nFields; n++) {
656 Index = n+nFields*(z+Nz*(x+Nx*(y)));
657 fprintf(target,"%10.5f %10.5f ",local_data[Index].re*factor,local_data[Index].im*factor);
658 }
659 }
660 fprintf(target,"\n");
661 }
662
663 }
664}
665
666static void OutputDataTestCA(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_complex *local_data,double factor,FILE *target) {
667 int x,yz,n,Index,Y,Z,YZ,y,z;
668 int Nx = plan->Levplan[Level].nx;
669 int Ny = plan->Levplan[Level].local_ny ;
670 int Nz = plan->Levplan[Level].nz;
671 int Nyz = Ny*Nz;
672 int NZ = plan->NLSR[2]/2+1;
673 fprintf(target,"Level=%i\n",Level);
674 for (y = 0; y < Ny; y++) {
675 for (z = 0; z < Nz; z++) {
676 Y = (y+plan->myPE*Ny)/plan->Levplan[Level].LevelFactor;
677 Z = z/plan->Levplan[Level].LevelFactor;
678 YZ = Z+NZ*Y;
679 YZ = plan->pw[YZ].Index;
680 yz = z+Nz*y;
681 fprintf(target,"yz=%i(%i)\n",yz+plan->myPE*Nyz,YZ);
682 for (x=0; x < Nx; x++) {
683 for (n=0; n < nFields; n++) {
684 Index = n+nFields*(x+Nx*(yz));
685 fprintf(target,"%10.5f %10.5f ",((double *)local_data)[2*Index]*factor,((double *)local_data)[2*Index+1]*factor);
686 }
687 }
688 fprintf(target,"\n");
689 }
690 }
691}
692
693static void InitDataTestCA(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_complex *local_data) {
694 int x,yz,n,Index,Y,Z,YZ,y,z,Value=0;
695 int Nx = plan->Levplan[Level].nx;
696 int Ny = plan->Levplan[Level].local_ny ;
697 int Nz = plan->Levplan[Level].nz;
698 int Nyz = plan->Levplan[Level].ny*Nz;
699 int NZ = plan->NLSR[2]/2+1;
700 int YY;
701 for (y = 0; y < Ny; y++) {
702 for (z = 0; z < Nz; z++) {
703 Y = (y+plan->myPE*Ny)/plan->Levplan[Level].LevelFactor;
704 YY = (y+plan->myPE*Ny)%plan->Levplan[Level].LevelFactor;
705 Z = z/plan->Levplan[Level].Level;
706 YZ = Z+NZ*Y;
707 YZ = plan->pw[YZ].Index;
708 Z = (YZ%NZ)*plan->Levplan[Level].LevelFactor+(z)%plan->Levplan[Level].LevelFactor;
709 yz = Z+Nz*(((YZ/NZ)*plan->Levplan[Level].LevelFactor+YY));
710 for (x=0; x < Nx; x++) {
711 for (n=0; n < nFields; n++) {
712 Index = n+nFields*(x+Nx*(z+Nz*y));
713 Value = n+nFields*(yz+Nyz*x);
714 local_data[Index].re = Value*2;
715 local_data[Index].im = Value*2+1;
716 }
717 }
718 }
719 }
720}
721
722static void OutputDataTestCB(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_complex *local_data,double factor,FILE *target) {
723 int x,yz,n,Index,Y,Z,YZ,y,z;
724 int Nx = plan->Levplan[Level].local_nx;
725 int Ny = plan->Levplan[Level].ny ;
726 int Nz = plan->Levplan[Level].nz;
727 int NZ = plan->NLSR[2]/2+1;
728 fprintf(target,"Level=%i\n",Level);
729 for (x=0; x < Nx; x++) {
730 fprintf(target,"x=%i\n",x+plan->myPE*Nx);
731 for (y = 0; y < Ny; y++) {
732 for (z = 0; z < Nz; z++) {
733 Y = (y)/plan->Levplan[Level].LevelFactor;
734 Z = z/plan->Levplan[Level].LevelFactor;
735 YZ = Z+NZ*Y;
736 YZ = plan->pw[YZ].Index;
737 yz = z+Nz*y;
738 for (n=0; n < nFields; n++) {
739 Index = n+nFields*(yz+Ny*Nz*(x));
740 fprintf(target,"%10.5f %10.5f ",local_data[Index].re*factor,local_data[Index].im*factor);
741 }
742 }
743 fprintf(target,"\n");
744 }
745 }
746}
747
748void fft_3d_c2r_r2c_Test(struct Problem *P, const struct fft_plan_3d *plan, const int nFields) {
749 int i,j,Max=10;
750 double time1, time2, timeCR, timeRC, time1CR, time1RC, factor =1.0;
751 int local_nx, local_x_start, local_ny_after_transpose,local_y_start_after_transpose,total_local_size;
752 FILE *data0,*data1,*data0C,*data1C;
753 OpenFileNo2(P,&data0,".data0",plan->myPE,"w",P->Call.out[ReadOut]);
754 OpenFileNo2(P,&data1,".data1",plan->myPE,"w",P->Call.out[ReadOut]);
755 OpenFileNo2(P,&data0C,".data0C",plan->myPE,"w",P->Call.out[ReadOut]);
756 OpenFileNo2(P,&data1C,".data1C",plan->myPE,"w",P->Call.out[ReadOut]);
757 fprintf(stderr, "(%i)C2RTest: \n", P->Par.me);
758 for (i=plan->MaxLevel-1; i >= 0; i--) {
759 timeCR = 0;
760 timeRC = 0;
761 time1RC = 0;
762 time1CR = 0;
763 fprintf(stderr,"PE(%i) L(%i) Nx(%i) Nyz(%i) nF(%i)\n",plan->myPE,i,plan->Levplan[i].N[0],plan->Levplan[i].local_nyz,nFields);
764 factor = 1./(plan->Levplan[i].N[0]*plan->Levplan[i].N[1]*plan->Levplan[i].N[2]);
765 for (j=0; j < Max; j++) {
766 InitDataTestR(plan,i,nFields,(fftw_real *)plan->local_data);
767 time1 = GetTime();
768 fft_3d_real_to_complex(plan,i,nFields,plan->local_data, plan->work);
769 time2 = GetTime();
770 timeRC += (time2 - time1);
771 time1 = GetTime();
772 fft_3d_complex_to_real(plan,i,nFields,plan->local_data, plan->work);
773 time2 = GetTime();
774 timeCR += (time2 - time1);
775
776 }
777 fprintf(stderr,"PE(%i): L(%i) CR:sec(%g) RC:sec(%g)\n",plan->myPE, i, timeCR, timeRC);
778 }
779 fclose(data0);
780 fclose(data1);
781 fclose(data0C);
782 fclose(data1C);
783}
784*/
Note: See TracBrowser for help on using the repository browser.