1 | //
|
---|
2 | // hsosv2lb.cc
|
---|
3 | //
|
---|
4 | // Copyright (C) 1996 Limit Point Systems, Inc.
|
---|
5 | //
|
---|
6 | // Author: Ida Nielsen <ida@kemi.aau.dk>
|
---|
7 | // Maintainer: LPS
|
---|
8 | //
|
---|
9 | // This file is part of the SC Toolkit.
|
---|
10 | //
|
---|
11 | // The SC Toolkit is free software; you can redistribute it and/or modify
|
---|
12 | // it under the terms of the GNU Library General Public License as published by
|
---|
13 | // the Free Software Foundation; either version 2, or (at your option)
|
---|
14 | // any later version.
|
---|
15 | //
|
---|
16 | // The SC Toolkit is distributed in the hope that it will be useful,
|
---|
17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
19 | // GNU Library General Public License for more details.
|
---|
20 | //
|
---|
21 | // You should have received a copy of the GNU Library General Public License
|
---|
22 | // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
|
---|
23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
24 | //
|
---|
25 | // The U.S. Government is granted a limited license as per AL 91-7.
|
---|
26 | //
|
---|
27 |
|
---|
28 | #include <math.h>
|
---|
29 |
|
---|
30 | #include <util/misc/timer.h>
|
---|
31 | #include <util/misc/formio.h>
|
---|
32 | #include <chemistry/molecule/molecule.h>
|
---|
33 | #include <chemistry/qc/mbpt/mbpt.h>
|
---|
34 | #include <chemistry/qc/mbpt/bzerofast.h>
|
---|
35 |
|
---|
36 | using namespace std;
|
---|
37 | using namespace sc;
|
---|
38 |
|
---|
39 | static void iqs(int *item,int *index,int left,int right);
|
---|
40 | static void iquicksort(int *item,int *index,int n);
|
---|
41 | static void findprocminmax(int *nbf, int nproc,
|
---|
42 | int *procmin, int *procmax, int *minbf, int *maxbf);
|
---|
43 | static void findshellmax(int *myshellsizes, int nRshell, int *shellmax,
|
---|
44 | int *shellmaxindex);
|
---|
45 | static void expandintarray(int *&a, int dim);
|
---|
46 |
|
---|
47 | void
|
---|
48 | MBPT2::compute_hsos_v2_lb()
|
---|
49 | {
|
---|
50 | int i, j, k, l;
|
---|
51 | int s1, s2;
|
---|
52 | int a, b;
|
---|
53 | int isocc, asocc; // indices running over singly occupied orbitals
|
---|
54 | int nfuncmax = basis()->max_nfunction_in_shell();
|
---|
55 | int nvir;
|
---|
56 | int nshell;
|
---|
57 | int shellmax;
|
---|
58 | int shellmaxindex;
|
---|
59 | int nocc=0,ndocc=0,nsocc=0;
|
---|
60 | int i_offset;
|
---|
61 | int npass, pass;
|
---|
62 | int ni;
|
---|
63 | int np, nq, nr, ns;
|
---|
64 | int P, Q, R, S;
|
---|
65 | int p, q, r, s;
|
---|
66 | int bf1, bf2, bf3, bf4;
|
---|
67 | int bf3_offset;
|
---|
68 | int nbfmoved;
|
---|
69 | int nbfav; // average number of r basis functions per node
|
---|
70 | int minbf, maxbf; // max/min number of (r) basis functions on a node
|
---|
71 | int index;
|
---|
72 | int compute_index;
|
---|
73 | int col_index;
|
---|
74 | int tmp_index;
|
---|
75 | int dim_ij;
|
---|
76 | int docc_index, socc_index, vir_index;
|
---|
77 | int me;
|
---|
78 | int nproc;
|
---|
79 | int procmin, procmax; // processor with most/fewest basis functions
|
---|
80 | int rest;
|
---|
81 | int r_offset;
|
---|
82 | int min;
|
---|
83 | int iproc;
|
---|
84 | int nRshell;
|
---|
85 | int imyshell;
|
---|
86 | int *myshells; // the R indices processed by node me
|
---|
87 | int *myshellsizes; // sizes of the shells (after split) on node me
|
---|
88 | int *split_info; // on each node: offset for each shell; -1 if shell not split
|
---|
89 | int *shellsize; // size of each shell
|
---|
90 | int *sorted_shells; // sorted shell indices: large shells->small shells
|
---|
91 | int *nbf; // number of basis functions processed by each node
|
---|
92 | int *proc; // element k: processor which will process shell k
|
---|
93 | int aoint_computed = 0;
|
---|
94 | double A, B, C, ni_top, max, ni_double; // variables used to compute ni
|
---|
95 | double *evals_open; // reordered scf eigenvalues
|
---|
96 | const double *intbuf; // 2-electron AO integral buffer
|
---|
97 | double *trans_int1; // partially transformed integrals
|
---|
98 | double *trans_int2; // partially transformed integrals
|
---|
99 | double *trans_int3; // partially transformed integrals
|
---|
100 | double *trans_int4; // fully transformed integrals
|
---|
101 | double *trans_int4_tmp; // scratch array
|
---|
102 | double *mo_int_do_so_vir=0;//mo integral (is|sa); i:d.o.,s:s.o.,a:vir
|
---|
103 | double *mo_int_tmp=0; // scratch array used in global summations
|
---|
104 | double *socc_sum=0; // sum of 2-el integrals involving only s.o.'s
|
---|
105 | double *socc_sum_tmp=0;// scratch array
|
---|
106 | double *iqrs, *iprs;
|
---|
107 | double *iars_ptr;
|
---|
108 | double iars;
|
---|
109 | double iajr;
|
---|
110 | double *iajr_ptr;
|
---|
111 | double *iajb;
|
---|
112 | double pqrs;
|
---|
113 | double *c_qa;
|
---|
114 | double *c_rb, *c_pi, *c_qi, *c_sj;
|
---|
115 | double delta_ijab;
|
---|
116 | double delta;
|
---|
117 | double contrib1, contrib2;
|
---|
118 | double ecorr_opt2=0,ecorr_opt1=0;
|
---|
119 | double ecorr_zapt2;
|
---|
120 | double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
|
---|
121 | double escf;
|
---|
122 | double eopt2,eopt1,ezapt2;
|
---|
123 | double tol; // log2 of the erep tolerance (erep < 2^tol => discard)
|
---|
124 |
|
---|
125 | me = msg_->me();
|
---|
126 |
|
---|
127 | ExEnv::out0() << indent << "Just entered OPT2 program (opt2v2lb)" << endl;
|
---|
128 |
|
---|
129 | tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10
|
---|
130 |
|
---|
131 | nproc = msg_->n();
|
---|
132 | ExEnv::out0() << indent << "nproc = " << nproc << endl;
|
---|
133 |
|
---|
134 | ndocc = nsocc = 0;
|
---|
135 | const double epsilon = 1.0e-4;
|
---|
136 | for (i=0; i<oso_dimension()->n(); i++) {
|
---|
137 | if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
|
---|
138 | else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
|
---|
139 | }
|
---|
140 |
|
---|
141 | // Do a few preliminary tests to make sure the desired calculation
|
---|
142 | // can be done (and appears to be meaningful!)
|
---|
143 |
|
---|
144 | if (ndocc == 0 && nsocc == 0) {
|
---|
145 | ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
|
---|
146 | abort();
|
---|
147 | }
|
---|
148 |
|
---|
149 | if (nfzc > ndocc) {
|
---|
150 | ExEnv::err0()
|
---|
151 | << "The number of frozen core orbitals exceeds the number" << endl
|
---|
152 | << "of doubly occupied orbitals; program exiting" << endl;
|
---|
153 | abort();
|
---|
154 | }
|
---|
155 |
|
---|
156 | if (nfzv > noso - ndocc - nsocc) {
|
---|
157 | ExEnv::err0()
|
---|
158 | << "The number of frozen virtual orbitals exceeds the number" << endl
|
---|
159 | << "of unoccupied orbitals; program exiting" << endl;
|
---|
160 | abort();
|
---|
161 | }
|
---|
162 |
|
---|
163 | ndocc = ndocc - nfzc;
|
---|
164 | // nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb.
|
---|
165 | nvir = noso - ndocc - nfzc - nfzv;
|
---|
166 | // nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb.
|
---|
167 | nocc = ndocc + nsocc;
|
---|
168 | nshell = basis()->nshell();
|
---|
169 |
|
---|
170 | // Allocate storage for some arrays used for keeping track of which R
|
---|
171 | // indices are processed by each node
|
---|
172 | shellsize = (int*) malloc(nshell*sizeof(int));
|
---|
173 | sorted_shells = (int*) malloc(nshell*sizeof(int));
|
---|
174 | nbf = (int*) malloc(nproc*sizeof(int));
|
---|
175 | proc = (int*) malloc(nshell*sizeof(int));
|
---|
176 |
|
---|
177 |
|
---|
178 | ///////////////////////////////////////////////////////
|
---|
179 | // Begin distributing R shells between nodes so all
|
---|
180 | // nodes get ca. the same number of r basis functions
|
---|
181 | ///////////////////////////////////////////////////////
|
---|
182 |
|
---|
183 | // Compute the size of each shell
|
---|
184 | for (i=0; i<nshell; i++) {
|
---|
185 | shellsize[i] = basis()->shell(i).nfunction();
|
---|
186 | }
|
---|
187 |
|
---|
188 | // Do an index sort (large -> small) of shellsize to form sorted_shells
|
---|
189 | iquicksort(shellsize,sorted_shells,nshell);
|
---|
190 |
|
---|
191 | // Initialize nbf
|
---|
192 | for (i=0; i<nproc; i++) nbf[i] = 0;
|
---|
193 |
|
---|
194 | for (i=0; i<nshell; i++) {
|
---|
195 | min = nbf[0];
|
---|
196 | iproc = 0;
|
---|
197 | for (j=1; j<nproc; j++) {
|
---|
198 | if (nbf[j] < min) {
|
---|
199 | iproc = j;
|
---|
200 | min = nbf[j];
|
---|
201 | }
|
---|
202 | }
|
---|
203 | proc[sorted_shells[i]] = iproc;
|
---|
204 | nbf[iproc] += shellsize[sorted_shells[i]];
|
---|
205 | }
|
---|
206 | if (me == 0) {
|
---|
207 | ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl;
|
---|
208 | for (i=0; i<nproc; i++) {
|
---|
209 | if (i%12 == 0) ExEnv::out0() << indent;
|
---|
210 | ExEnv::out0() << scprintf(" %4i",nbf[i]);
|
---|
211 | if ((i+1)%12 == 0) ExEnv::out0() << endl;
|
---|
212 | }
|
---|
213 | ExEnv::out0() << endl;
|
---|
214 | }
|
---|
215 |
|
---|
216 | // Determine which shells are to be processed by node me
|
---|
217 | nRshell = 0;
|
---|
218 | for (i=0; i<nshell; i++) {
|
---|
219 | if (proc[i] == me) nRshell++;
|
---|
220 | }
|
---|
221 | myshells = (int*) malloc(nRshell*sizeof(int));
|
---|
222 | imyshell = 0;
|
---|
223 | for (i=0; i<nshell; i++) {
|
---|
224 | if (proc[i] == me) {
|
---|
225 | myshells[imyshell] = i;
|
---|
226 | imyshell++;
|
---|
227 | }
|
---|
228 | }
|
---|
229 |
|
---|
230 | /////////////////////////////////////////////////////////////
|
---|
231 | // End of preliminary distribution of R shells between nodes
|
---|
232 | /////////////////////////////////////////////////////////////
|
---|
233 |
|
---|
234 | // Compute the average number of basis functions per node
|
---|
235 | nbfav = nbasis/nproc;
|
---|
236 | if (nbasis%nproc) nbfav++;
|
---|
237 |
|
---|
238 | myshellsizes = (int*) malloc(nRshell*sizeof(int));
|
---|
239 | split_info = (int*) malloc(nRshell*sizeof(int));
|
---|
240 | for (j=0; j<nRshell; j++) {
|
---|
241 | myshellsizes[j] = basis()->shell(myshells[j]).nfunction();
|
---|
242 | split_info[j] = -1;
|
---|
243 | }
|
---|
244 |
|
---|
245 | // Find the processor with the most/fewest basis functions
|
---|
246 | findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf);
|
---|
247 | if (maxbf > nbfav) {
|
---|
248 | ExEnv::out0() << indent << "Redistributing basis functions" << endl;
|
---|
249 | }
|
---|
250 |
|
---|
251 | while (maxbf > nbfav) {
|
---|
252 | msg_->sync();
|
---|
253 | if (me == procmax) {
|
---|
254 |
|
---|
255 | findshellmax(myshellsizes, nRshell, &shellmax, &shellmaxindex);
|
---|
256 | nbfmoved = 0;
|
---|
257 | while (maxbf>nbfav && minbf<nbfav && shellmax>1) {
|
---|
258 | shellmax--;
|
---|
259 | nbfmoved++;
|
---|
260 | maxbf--;
|
---|
261 | minbf++;
|
---|
262 | }
|
---|
263 | myshellsizes[shellmaxindex] = shellmax;
|
---|
264 | if (split_info[shellmaxindex] == -1) split_info[shellmaxindex] = 0;
|
---|
265 | shellmax += nbfmoved;
|
---|
266 |
|
---|
267 | // Send nbfmoved from procmax to all other nodes
|
---|
268 | msg_->bcast(nbfmoved,procmax);
|
---|
269 |
|
---|
270 | // Send variables to node procmin
|
---|
271 | msg_->send(procmin,&myshells[shellmaxindex],1);
|
---|
272 | msg_->send(procmin,&shellmax,1);
|
---|
273 |
|
---|
274 | }
|
---|
275 | else {
|
---|
276 | // Receive nbfmoved from procmax
|
---|
277 | msg_->bcast(nbfmoved,procmax);
|
---|
278 | }
|
---|
279 |
|
---|
280 | nbf[procmax] -= nbfmoved;
|
---|
281 |
|
---|
282 | if (me == procmin) {
|
---|
283 | expandintarray(myshellsizes,nRshell);
|
---|
284 | expandintarray(myshells,nRshell);
|
---|
285 | expandintarray(split_info,nRshell);
|
---|
286 | nRshell++;
|
---|
287 | myshellsizes[nRshell-1] = nbfmoved;
|
---|
288 | msg_->recv(procmax,&myshells[nRshell-1],1);
|
---|
289 | msg_->recv(procmax,&split_info[nRshell-1],1);
|
---|
290 | split_info[nRshell-1] -= myshellsizes[nRshell-1];
|
---|
291 | }
|
---|
292 |
|
---|
293 | nbf[procmin] += nbfmoved;
|
---|
294 | msg_->sync();
|
---|
295 | findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf);
|
---|
296 |
|
---|
297 | }
|
---|
298 |
|
---|
299 | if (me == 0) {
|
---|
300 | ExEnv::out0() << indent
|
---|
301 | << "New distribution of basis functions between nodes:" << endl;
|
---|
302 | for (i=0; i<nproc; i++) {
|
---|
303 | if (i%12 == 0) ExEnv::out0() << indent;
|
---|
304 | ExEnv::out0() << scprintf(" %4i",nbf[i]);
|
---|
305 | if ((i+1)%12 == 0) ExEnv::out0() << endl;
|
---|
306 | }
|
---|
307 | ExEnv::out0() << endl;
|
---|
308 | }
|
---|
309 |
|
---|
310 |
|
---|
311 | //////////////////////////////////////////////////////////
|
---|
312 | // End of distribution of R shells and r basis functions
|
---|
313 | //////////////////////////////////////////////////////////
|
---|
314 |
|
---|
315 | // Compute batch size ni for opt2 loops;
|
---|
316 | // need to store the following arrays of type double : trans_int1-4,
|
---|
317 | // trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp,
|
---|
318 | // mo_int_do_so_vir, mo_int_tmp,
|
---|
319 | // and the following arrays of type int: myshells, shellsize,
|
---|
320 | // sorted_shells, nbf, and proc
|
---|
321 | A = -0.5*sizeof(double)*nbf[me]*nvir;
|
---|
322 | B = sizeof(double)*(nfuncmax*nfuncmax*nbasis + nvir + nocc*nbf[me]*nvir
|
---|
323 | + nbf[me]*nvir*0.5);
|
---|
324 | C = sizeof(double)*(2*nvir*nvir + (nbasis+1)*(nvir+nocc) + 2*nsocc
|
---|
325 | + 2*ndocc*nsocc*(nvir-nsocc))
|
---|
326 | + sizeof(int)*(3*nshell + nproc + nRshell);
|
---|
327 | ni_top = -B/(2*A);
|
---|
328 | max = A*ni_top*ni_top + B*ni_top +C;
|
---|
329 | if (max <= mem_alloc) {
|
---|
330 | ni = nocc;
|
---|
331 | }
|
---|
332 | else {
|
---|
333 | ni_double = (-B + sqrt((double)(B*B - 4*A*(C-mem_alloc))))/(2*A);
|
---|
334 | ni = (int) ni_double;
|
---|
335 | if (ni > nocc) ni = nocc;
|
---|
336 | max = mem_alloc;
|
---|
337 | }
|
---|
338 |
|
---|
339 | size_t mem_remaining = mem_alloc - (size_t)max;
|
---|
340 |
|
---|
341 | // Set ni equal to the smallest batch size for any node
|
---|
342 | msg_->min(ni);
|
---|
343 | msg_->bcast(ni);
|
---|
344 |
|
---|
345 | if (ni < nsocc) {
|
---|
346 | ExEnv::err0() << "Not enough memory allocated" << endl;
|
---|
347 | abort();
|
---|
348 | }
|
---|
349 |
|
---|
350 | if (ni < 1) { // this applies only to a closed shell case
|
---|
351 | ExEnv::err0() << "Not enough memory allocated" << endl;
|
---|
352 | abort();
|
---|
353 | }
|
---|
354 |
|
---|
355 | ExEnv::out0() << indent << "Computed batchsize: " << ni << endl;
|
---|
356 |
|
---|
357 | if (nocc == ni) {
|
---|
358 | npass = 1;
|
---|
359 | rest = 0;
|
---|
360 | }
|
---|
361 | else {
|
---|
362 | rest = nocc%ni;
|
---|
363 | npass = (nocc - rest)/ni + 1;
|
---|
364 | if (rest == 0) npass--;
|
---|
365 | }
|
---|
366 |
|
---|
367 | if (me == 0) {
|
---|
368 | ExEnv::out0() << indent << " npass rest nbasis nshell nfuncmax"
|
---|
369 | " ndocc nsocc nvir nfzc nfzv" << endl;
|
---|
370 | ExEnv::out0() << indent
|
---|
371 | << scprintf(" %-4i %-3i %-5i %-4i %-3i"
|
---|
372 | " %-3i %-3i %-3i %-3i %-3i\n",
|
---|
373 | npass,rest,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);
|
---|
374 | ExEnv::out0() << indent
|
---|
375 | << scprintf("Using %i bytes of memory",mem_alloc) << endl;
|
---|
376 | }
|
---|
377 |
|
---|
378 | //////////////////////
|
---|
379 | // Test that ni is OK
|
---|
380 | //////////////////////
|
---|
381 | if (me == 0) {
|
---|
382 | ExEnv::out0() << indent
|
---|
383 | << scprintf("Memory allocated: %i", mem_alloc) << endl;
|
---|
384 | ExEnv::out0() << indent
|
---|
385 | << scprintf("Memory used : %lf", A*ni*ni+B*ni+C) << endl;
|
---|
386 | if (A*ni*ni + B*ni +C > mem_alloc) {
|
---|
387 | ExEnv::err0() << "Problems with memory allocation: "
|
---|
388 | << "Using more memory than allocated" << endl;
|
---|
389 | abort();
|
---|
390 | }
|
---|
391 | }
|
---|
392 |
|
---|
393 | //////////////////////////////////////////////////////////////////
|
---|
394 | // The scf vector might be distributed between the nodes,
|
---|
395 | // but for OPT2 each node needs its own copy of the vector;
|
---|
396 | // therefore, put a copy of the scf vector on each node;
|
---|
397 | // while doing this, duplicate columns corresponding to singly
|
---|
398 | // occupied orbitals and order columns as [socc docc socc unocc]
|
---|
399 | // Also rearrange scf eigenvalues as [socc docc socc unocc]
|
---|
400 | // want socc first to get the socc's in the first batch
|
---|
401 | // (need socc's to compute energy denominators - see
|
---|
402 | // socc_sum comment below)
|
---|
403 | /////////////////////////////////////////////////////////
|
---|
404 | evals_open = (double*) malloc((noso+nsocc-nfzc-nfzv)*sizeof(double));
|
---|
405 |
|
---|
406 | RefDiagSCMatrix occ;
|
---|
407 | RefDiagSCMatrix evals;
|
---|
408 | RefSCMatrix Scf_Vec;
|
---|
409 | eigen(evals, Scf_Vec, occ);
|
---|
410 |
|
---|
411 | if (debug_) {
|
---|
412 | evals.print("eigenvalues");
|
---|
413 | Scf_Vec.print("eigenvectors");
|
---|
414 | }
|
---|
415 |
|
---|
416 | double *scf_vectort_dat = new double[nbasis*noso];
|
---|
417 | Scf_Vec->convert(scf_vectort_dat);
|
---|
418 |
|
---|
419 | double** scf_vectort = new double*[nocc + nvir];
|
---|
420 |
|
---|
421 | int idoc = 0, ivir = 0, isoc = 0;
|
---|
422 | for (i=nfzc; i<noso-nfzv; i++) {
|
---|
423 | if (occ(i) >= 2.0 - epsilon) {
|
---|
424 | evals_open[idoc+nsocc] = evals(i);
|
---|
425 | scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
|
---|
426 | idoc++;
|
---|
427 | }
|
---|
428 | else if (occ(i) >= 1.0 - epsilon) {
|
---|
429 | evals_open[isoc] = evals(i);
|
---|
430 | scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
|
---|
431 | evals_open[isoc+nocc] = evals(i);
|
---|
432 | scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
|
---|
433 | isoc++;
|
---|
434 | }
|
---|
435 | else {
|
---|
436 | if (ivir < nvir) {
|
---|
437 | evals_open[ivir+nocc+nsocc] = evals(i);
|
---|
438 | scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
|
---|
439 | }
|
---|
440 | ivir++;
|
---|
441 | }
|
---|
442 | }
|
---|
443 | // need the transpose of the vector
|
---|
444 | double **scf_vector = new double*[nbasis];
|
---|
445 | double *scf_vector_dat = new double[(nocc+nvir)*nbasis];
|
---|
446 | for (i=0; i<nbasis; i++) {
|
---|
447 | scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
|
---|
448 | for (j=0; j<nocc+nvir; j++) {
|
---|
449 | scf_vector[i][j] = scf_vectort[j][i];
|
---|
450 | }
|
---|
451 | }
|
---|
452 | delete[] scf_vectort;
|
---|
453 | delete[] scf_vectort_dat;
|
---|
454 |
|
---|
455 | ////////////////////////////////////////
|
---|
456 | // Allocate storage for various arrays
|
---|
457 | ////////////////////////////////////////
|
---|
458 |
|
---|
459 | dim_ij = nocc*ni - ni*(ni - 1)/2;
|
---|
460 |
|
---|
461 | trans_int1 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double));
|
---|
462 | trans_int2 = (double*) malloc(nvir*ni*sizeof(double));
|
---|
463 | trans_int3 = (double*) malloc(nbf[me]*nvir*dim_ij*sizeof(double));
|
---|
464 | trans_int4 = (double*) malloc(nvir*nvir*sizeof(double));
|
---|
465 | trans_int4_tmp = (double*) malloc(nvir*nvir*sizeof(double));
|
---|
466 | if (nsocc) socc_sum = (double*) malloc(nsocc*sizeof(double));
|
---|
467 | if (nsocc) socc_sum_tmp = (double*) malloc(nsocc*sizeof(double));
|
---|
468 | if (nsocc) mo_int_do_so_vir =
|
---|
469 | (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
|
---|
470 | if (nsocc) mo_int_tmp =
|
---|
471 | (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
|
---|
472 |
|
---|
473 | if (nsocc) bzerofast(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc));
|
---|
474 |
|
---|
475 | // create the integrals object
|
---|
476 | integral()->set_storage(mem_remaining);
|
---|
477 | tbint_ = integral()->electron_repulsion();
|
---|
478 | intbuf = tbint_->buffer();
|
---|
479 |
|
---|
480 | /////////////////////////////////////
|
---|
481 | // Begin opt2 loops
|
---|
482 | /////////////////////////////////////
|
---|
483 |
|
---|
484 |
|
---|
485 | for (pass=0; pass<npass; pass++) {
|
---|
486 | i_offset = pass*ni;
|
---|
487 | if ((pass == npass - 1) && (rest != 0)) ni = rest;
|
---|
488 |
|
---|
489 | r_offset = 0;
|
---|
490 | bzerofast(trans_int3,nbf[me]*nvir*dim_ij);
|
---|
491 |
|
---|
492 | tim_enter("RS loop");
|
---|
493 |
|
---|
494 | for (imyshell=0; imyshell<nRshell; imyshell++) {
|
---|
495 |
|
---|
496 | R = myshells[imyshell];
|
---|
497 | nr = myshellsizes[imyshell];
|
---|
498 |
|
---|
499 | for (S = 0; S < nshell; S++) {
|
---|
500 | ns = basis()->shell(S).nfunction();
|
---|
501 | tim_enter("bzerofast trans_int1");
|
---|
502 | bzerofast(trans_int1,nfuncmax*nfuncmax*nbasis*ni);
|
---|
503 | tim_exit("bzerofast trans_int1");
|
---|
504 |
|
---|
505 | tim_enter("PQ loop");
|
---|
506 |
|
---|
507 | for (P = 0; P < nshell; P++) {
|
---|
508 | np = basis()->shell(P).nfunction();
|
---|
509 |
|
---|
510 | for (Q = 0; Q <= P; Q++) {
|
---|
511 | if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {
|
---|
512 | continue; // skip ereps less than tol
|
---|
513 | }
|
---|
514 |
|
---|
515 | aoint_computed++;
|
---|
516 |
|
---|
517 | nq = basis()->shell(Q).nfunction();
|
---|
518 |
|
---|
519 | tim_enter("erep");
|
---|
520 | tbint_->compute_shell(P,Q,R,S);
|
---|
521 | tim_exit("erep");
|
---|
522 |
|
---|
523 | tim_enter("1. quart. tr.");
|
---|
524 |
|
---|
525 | for (bf1 = 0; bf1 < np; bf1++) {
|
---|
526 | p = basis()->shell_to_function(P) + bf1;
|
---|
527 |
|
---|
528 | for (bf2 = 0; bf2 < nq; bf2++) {
|
---|
529 | q = basis()->shell_to_function(Q) + bf2;
|
---|
530 | if (q > p) {
|
---|
531 | // if q > p: want to skip the loops over bf3-4
|
---|
532 | // and larger bf2 values, so increment bf1 by 1
|
---|
533 | // ("break")
|
---|
534 | break;
|
---|
535 | }
|
---|
536 |
|
---|
537 | for (bf3 = 0; bf3 < nr; bf3++) {
|
---|
538 | bf3_offset = 0;
|
---|
539 | if (split_info[imyshell] != -1) bf3_offset = split_info[imyshell];
|
---|
540 |
|
---|
541 | for (bf4 = 0; bf4 < ns; bf4++) {
|
---|
542 |
|
---|
543 | index = bf4 + ns*(bf3+bf3_offset +
|
---|
544 | basis()->shell(R).nfunction()*(bf2 + nq*bf1));
|
---|
545 |
|
---|
546 | if (fabs(intbuf[index]) > 1.0e-15) {
|
---|
547 | pqrs = intbuf[index];
|
---|
548 |
|
---|
549 | iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
|
---|
550 | iprs = &trans_int1[((bf4*nr + bf3)*nbasis + p)*ni];
|
---|
551 |
|
---|
552 | if (p == q) pqrs *= 0.5;
|
---|
553 | col_index = i_offset;
|
---|
554 | c_pi = &scf_vector[p][col_index];
|
---|
555 | c_qi = &scf_vector[q][col_index];
|
---|
556 |
|
---|
557 | for (i=ni; i; i--) {
|
---|
558 | *iqrs++ += pqrs * *c_pi++;
|
---|
559 | *iprs++ += pqrs * *c_qi++;
|
---|
560 | }
|
---|
561 | }
|
---|
562 | } // exit bf4 loop
|
---|
563 | } // exit bf3 loop
|
---|
564 | } // exit bf2 loop
|
---|
565 | } // exit bf1 loop
|
---|
566 | tim_exit("1. quart. tr.");
|
---|
567 | } // exit Q loop
|
---|
568 | } // exit P loop
|
---|
569 | tim_exit("PQ loop");
|
---|
570 |
|
---|
571 | // Begin second and third quarter transformations
|
---|
572 |
|
---|
573 | for (bf3 = 0; bf3 < nr; bf3++) {
|
---|
574 | r = r_offset + bf3;
|
---|
575 |
|
---|
576 | for (bf4 = 0; bf4 < ns; bf4++) {
|
---|
577 | s = basis()->shell_to_function(S) + bf4;
|
---|
578 |
|
---|
579 | tim_enter("bzerofast trans_int2");
|
---|
580 | bzerofast(trans_int2,nvir*ni);
|
---|
581 | tim_exit("bzerofast trans_int2");
|
---|
582 |
|
---|
583 | tim_enter("2. quart. tr.");
|
---|
584 |
|
---|
585 | for (q = 0; q < nbasis; q++) {
|
---|
586 | iars_ptr = trans_int2;
|
---|
587 | iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
|
---|
588 | c_qa = &scf_vector[q][nocc];
|
---|
589 |
|
---|
590 | for (a = 0; a < nvir; a++) {
|
---|
591 |
|
---|
592 | for (i=ni; i; i--) {
|
---|
593 | *iars_ptr++ += *c_qa * *iqrs++;
|
---|
594 | }
|
---|
595 |
|
---|
596 | iqrs -= ni;
|
---|
597 | c_qa++;
|
---|
598 | }
|
---|
599 | } // exit q loop
|
---|
600 | tim_exit("2. quart. tr.");
|
---|
601 |
|
---|
602 | // Begin third quarter transformation
|
---|
603 |
|
---|
604 | tim_enter("3. quart. tr.");
|
---|
605 |
|
---|
606 | for (i=0; i<ni; i++) {
|
---|
607 | tmp_index = i*(i+1)/2 + i*i_offset;
|
---|
608 |
|
---|
609 | for (a=0; a<nvir; a++) {
|
---|
610 | iars = trans_int2[a*ni + i];
|
---|
611 | c_sj = scf_vector[s];
|
---|
612 | iajr_ptr = &trans_int3[tmp_index + dim_ij*(a + nvir*r)];
|
---|
613 |
|
---|
614 | for (j=0; j<=i+i_offset; j++) {
|
---|
615 | *iajr_ptr++ += *c_sj++ * iars;
|
---|
616 | }
|
---|
617 | }
|
---|
618 | } // exit i loop
|
---|
619 | tim_exit("3. quart. tr.");
|
---|
620 |
|
---|
621 | } // exit bf4 loop
|
---|
622 | } // exit bf3 loop
|
---|
623 |
|
---|
624 | } // exit S loop
|
---|
625 | r_offset += nr;
|
---|
626 | } // exit R loop
|
---|
627 | tim_exit("RS loop");
|
---|
628 |
|
---|
629 | // Begin fourth quarter transformation;
|
---|
630 | // first tansform integrals with only s.o. indices;
|
---|
631 | // these integrals are needed to compute the denominators
|
---|
632 | // in the various terms contributing to the correlation energy
|
---|
633 | // and must all be computed in the first pass;
|
---|
634 | // the integrals are summed into the array socc_sum:
|
---|
635 | // socc_sum[isocc] = sum over asocc of (isocc asocc|asocc isocc)
|
---|
636 | // (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s)
|
---|
637 | // the individual integrals are not saved here, only the sums are kept
|
---|
638 |
|
---|
639 | if (pass == 0) {
|
---|
640 | tim_enter("4. quart. tr.");
|
---|
641 | if (nsocc) bzerofast(socc_sum,nsocc);
|
---|
642 | for (isocc=0; isocc<nsocc; isocc++) {
|
---|
643 |
|
---|
644 | index = 0;
|
---|
645 | for (i=0; i<nRshell; i++) {
|
---|
646 | for (j=0; j<myshellsizes[i]; j++) {
|
---|
647 | r = basis()->shell_to_function(myshells[i]) + j;
|
---|
648 | if (split_info[i] != -1) r += split_info[i];
|
---|
649 |
|
---|
650 | for (asocc=0; asocc<nsocc; asocc++) {
|
---|
651 | socc_sum[isocc] += scf_vector[r][nocc+asocc]*
|
---|
652 | trans_int3[isocc*(isocc+1)/2 + isocc*i_offset
|
---|
653 | + isocc + dim_ij*(asocc + nvir*index)];
|
---|
654 | }
|
---|
655 | index++;
|
---|
656 | }
|
---|
657 | }
|
---|
658 | } // exit i loop
|
---|
659 |
|
---|
660 | tim_exit("4. quart. tr.");
|
---|
661 |
|
---|
662 | // Sum socc_sum contributions from each node (only if nsocc > 0
|
---|
663 | // since gop1 will fail if nsocc = 0)
|
---|
664 | if (nsocc > 0) {
|
---|
665 | tim_enter("global sum socc_sum");
|
---|
666 | msg_->sum(socc_sum,nsocc,socc_sum_tmp);
|
---|
667 | tim_exit("global sum socc_sum");
|
---|
668 | }
|
---|
669 |
|
---|
670 | }
|
---|
671 |
|
---|
672 | // Now we have all the sums of integrals involving s.o.'s (socc_sum);
|
---|
673 | // begin fourth quarter transformation for all integrals (including
|
---|
674 | // integrals with only s.o. indices); use restriction j <= (i_offset+i)
|
---|
675 | // to save flops
|
---|
676 |
|
---|
677 | compute_index = 0;
|
---|
678 |
|
---|
679 | for (i=0; i<ni; i++) {
|
---|
680 |
|
---|
681 | for (j=0; j <= (i_offset+i); j++) {
|
---|
682 |
|
---|
683 | tim_enter("4. quart. tr.");
|
---|
684 |
|
---|
685 | bzerofast(trans_int4,nvir*nvir);
|
---|
686 |
|
---|
687 | index = 0;
|
---|
688 | for (k=0; k<nRshell; k++) {
|
---|
689 | for (l=0; l<myshellsizes[k]; l++) {
|
---|
690 | r = basis()->shell_to_function(myshells[k]) + l;
|
---|
691 | if (split_info[k] != -1) r += split_info[k];
|
---|
692 |
|
---|
693 | for (a=0; a<nvir; a++) {
|
---|
694 | iajb = &trans_int4[a*nvir];
|
---|
695 | iajr = trans_int3[i*(i+1)/2 + i*i_offset + j + dim_ij*(a+nvir*index)];
|
---|
696 | c_rb = &scf_vector[r][nocc];
|
---|
697 |
|
---|
698 | for (b=0; b<nvir; b++) {
|
---|
699 | *iajb++ += *c_rb++ * iajr;
|
---|
700 | }
|
---|
701 | }
|
---|
702 | index++;
|
---|
703 | }
|
---|
704 | } // end of k loop
|
---|
705 |
|
---|
706 | tim_exit("4. quart. tr.");
|
---|
707 |
|
---|
708 | tim_enter("global sum trans_int4");
|
---|
709 | msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);
|
---|
710 | tim_exit("global sum trans_int4");
|
---|
711 |
|
---|
712 | // We now have the fully transformed integrals (ia|jb)
|
---|
713 | // for one i, one j (j <= i_offset+i), and all a and b;
|
---|
714 | // compute contribution to the OPT1 and OPT2 correlation
|
---|
715 | // energies; use restriction b <= a to save flops
|
---|
716 |
|
---|
717 | tim_enter("compute ecorr");
|
---|
718 |
|
---|
719 | for (a=0; a<nvir; a++) {
|
---|
720 | for (b=0; b<=a; b++) {
|
---|
721 | compute_index++;
|
---|
722 | if (compute_index%nproc != me) continue;
|
---|
723 |
|
---|
724 | docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
|
---|
725 | + (j >= nsocc && j < nocc);
|
---|
726 | socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
|
---|
727 | vir_index = (a >= nsocc) + (b >= nsocc);
|
---|
728 |
|
---|
729 | if (socc_index >= 3) continue; // skip to next b value
|
---|
730 |
|
---|
731 | delta_ijab = evals_open[i_offset+i] + evals_open[j]
|
---|
732 | - evals_open[nocc+a] - evals_open[nocc+b];
|
---|
733 |
|
---|
734 | // Determine integral type and compute energy contribution
|
---|
735 | if (docc_index == 2 && vir_index == 2) {
|
---|
736 | if (i_offset+i == j && a == b) {
|
---|
737 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
738 | ecorr_opt2 += contrib1/delta_ijab;
|
---|
739 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
740 | }
|
---|
741 | else if (i_offset+i == j || a == b) {
|
---|
742 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
743 | ecorr_opt2 += 2*contrib1/delta_ijab;
|
---|
744 | ecorr_opt1 += 2*contrib1/delta_ijab;
|
---|
745 | }
|
---|
746 | else {
|
---|
747 | contrib1 = trans_int4[a*nvir + b];
|
---|
748 | contrib2 = trans_int4[b*nvir + a];
|
---|
749 | ecorr_opt2 += 4*(contrib1*contrib1 + contrib2*contrib2
|
---|
750 | - contrib1*contrib2)/delta_ijab;
|
---|
751 | ecorr_opt1 += 4*(contrib1*contrib1 + contrib2*contrib2
|
---|
752 | - contrib1*contrib2)/delta_ijab;
|
---|
753 | }
|
---|
754 | }
|
---|
755 | else if (docc_index == 2 && socc_index == 2) {
|
---|
756 | contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
|
---|
757 | (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
|
---|
758 | ecorr_opt2 += contrib1/
|
---|
759 | (delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
|
---|
760 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
761 | }
|
---|
762 | else if (socc_index == 2 && vir_index == 2) {
|
---|
763 | contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
|
---|
764 | (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
|
---|
765 | ecorr_opt2 += contrib1/
|
---|
766 | (delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
|
---|
767 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
768 | }
|
---|
769 | else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
|
---|
770 | if (i_offset+i == j) {
|
---|
771 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
772 | ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
|
---|
773 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
774 | }
|
---|
775 | else {
|
---|
776 | contrib1 = trans_int4[a*nvir + b];
|
---|
777 | contrib2 = trans_int4[b*nvir + a];
|
---|
778 | ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
779 | - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[b]);
|
---|
780 | ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
781 | - contrib1*contrib2)/delta_ijab;
|
---|
782 | }
|
---|
783 | }
|
---|
784 | else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
|
---|
785 | contrib1 = trans_int4[b*nvir+a]*trans_int4[b*nvir+a];
|
---|
786 | if (j == b) {
|
---|
787 | // To compute the energy contribution from an integral of the
|
---|
788 | // type (is1|s1a) (i=d.o., s1=s.o., a=unocc.), we need the
|
---|
789 | // (is|sa) integrals for all s=s.o.; these integrals are
|
---|
790 | // therefore stored here in the array mo_int_do_so_vir, and
|
---|
791 | // the energy contribution is computed after exiting the loop
|
---|
792 | // over i-batches (pass)
|
---|
793 | mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
|
---|
794 | (i_offset+i-nsocc + ndocc*b)] =
|
---|
795 | trans_int4[b*nvir + a];
|
---|
796 | ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
|
---|
797 | ecorr_opt1 += 1.5*contrib1/delta_ijab;
|
---|
798 | ecorr_zapt2_contrib += contrib1/
|
---|
799 | (delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
|
---|
800 | + 0.5*contrib1/delta_ijab;
|
---|
801 | }
|
---|
802 | else {
|
---|
803 | ecorr_opt2 += contrib1/
|
---|
804 | (delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
|
---|
805 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
806 | }
|
---|
807 | }
|
---|
808 | else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
|
---|
809 | if (a == b) {
|
---|
810 | contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
|
---|
811 | ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
|
---|
812 | ecorr_opt1 += contrib1/delta_ijab;
|
---|
813 | }
|
---|
814 | else {
|
---|
815 | contrib1 = trans_int4[a*nvir + b];
|
---|
816 | contrib2 = trans_int4[b*nvir + a];
|
---|
817 | ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
818 | - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[j]);
|
---|
819 | ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
|
---|
820 | - contrib1*contrib2)/delta_ijab;
|
---|
821 | }
|
---|
822 | }
|
---|
823 | } // exit b loop
|
---|
824 | } // exit a loop
|
---|
825 | tim_exit("compute ecorr");
|
---|
826 | } // exit j loop
|
---|
827 | } // exit i loop
|
---|
828 |
|
---|
829 | if (nsocc == 0 && npass > 1 && pass < npass - 1) {
|
---|
830 | double passe = ecorr_opt2;
|
---|
831 | msg_->sum(passe);
|
---|
832 | ExEnv::out0() << indent
|
---|
833 | << "Partial correlation energy for pass " << pass << ":" << endl;
|
---|
834 | ExEnv::out0() << indent
|
---|
835 | << scprintf(" restart_ecorr = %14.10f", passe)
|
---|
836 | << endl;
|
---|
837 | ExEnv::out0() << indent
|
---|
838 | << scprintf(" restart_orbital_v2lb = %d", ((pass+1) * ni))
|
---|
839 | << endl;
|
---|
840 | }
|
---|
841 | } // exit loop over i-batches (pass)
|
---|
842 |
|
---|
843 |
|
---|
844 |
|
---|
845 | // Compute contribution from excitations of the type is1 -> s1a where
|
---|
846 | // i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a,
|
---|
847 | // where i and a have the same spin, contribute to this term;
|
---|
848 | // (Brillouin's theorem not satisfied for ROHF wave functions);
|
---|
849 | // do this only if nsocc > 0 since gop1 will fail otherwise
|
---|
850 |
|
---|
851 | tim_enter("compute ecorr");
|
---|
852 |
|
---|
853 | if (nsocc > 0) {
|
---|
854 | tim_enter("global sum mo_int_do_so_vir");
|
---|
855 | msg_->sum(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc),mo_int_tmp);
|
---|
856 | tim_exit("global sum mo_int_do_so_vir");
|
---|
857 | }
|
---|
858 |
|
---|
859 | // Add extra contribution for triplet and higher spin multiplicities
|
---|
860 | // contribution = sum over s1 and s2<s1 of (is1|s1a)*(is2|s2a)/delta
|
---|
861 |
|
---|
862 | if (me == 0 && nsocc) {
|
---|
863 | for (i=0; i<ndocc; i++) {
|
---|
864 |
|
---|
865 | for (a=0; a<nvir-nsocc; a++) {
|
---|
866 | delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
|
---|
867 |
|
---|
868 | for (s1=0; s1<nsocc; s1++) {
|
---|
869 |
|
---|
870 | for (s2=0; s2<s1; s2++) {
|
---|
871 | contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s1)]*
|
---|
872 | mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s2)]/delta;
|
---|
873 | ecorr_opt2 += contrib1;
|
---|
874 | ecorr_opt1 += contrib1;
|
---|
875 | }
|
---|
876 | }
|
---|
877 | } // exit a loop
|
---|
878 | } // exit i loop
|
---|
879 | }
|
---|
880 |
|
---|
881 | tim_exit("compute ecorr");
|
---|
882 |
|
---|
883 | ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
|
---|
884 | ecorr_opt2 += ecorr_opt2_contrib;
|
---|
885 | msg_->sum(ecorr_opt1);
|
---|
886 | msg_->sum(ecorr_opt2);
|
---|
887 | msg_->sum(ecorr_zapt2);
|
---|
888 | msg_->sum(aoint_computed);
|
---|
889 |
|
---|
890 | escf = reference_->energy();
|
---|
891 | hf_energy_ = escf;
|
---|
892 |
|
---|
893 | if (me == 0) {
|
---|
894 | eopt2 = escf + ecorr_opt2;
|
---|
895 | eopt1 = escf + ecorr_opt1;
|
---|
896 | ezapt2 = escf + ecorr_zapt2;
|
---|
897 |
|
---|
898 | // Print out various energies etc.
|
---|
899 | ExEnv::out0() << indent
|
---|
900 | << "Number of shell quartets for which AO integrals would" << endl
|
---|
901 | << indent << "have been computed without bounds checking: "
|
---|
902 | << npass*nshell*nshell*(nshell+1)*(nshell+1)/2 << endl;
|
---|
903 | ExEnv::out0() << indent
|
---|
904 | << "Number of shell quartets for which AO integrals" << endl
|
---|
905 | << indent << "were computed: " << aoint_computed << endl;
|
---|
906 |
|
---|
907 | ExEnv::out0() << indent
|
---|
908 | << scprintf("ROHF energy [au]: %17.12lf\n", escf);
|
---|
909 | ExEnv::out0() << indent
|
---|
910 | << scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
|
---|
911 | ExEnv::out0() << indent
|
---|
912 | << scprintf("OPT2 second order correction [au]: %17.12lf\n",
|
---|
913 | ecorr_opt2);
|
---|
914 | ExEnv::out0() << indent
|
---|
915 | << scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
|
---|
916 | ExEnv::out0() << indent
|
---|
917 | << scprintf("ZAPT2 correlation energy [au]: %17.12lf\n",
|
---|
918 | ecorr_zapt2);
|
---|
919 | ExEnv::out0() << indent
|
---|
920 | << scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
|
---|
921 | ExEnv::out0().flush();
|
---|
922 | }
|
---|
923 |
|
---|
924 | msg_->bcast(eopt1);
|
---|
925 | msg_->bcast(eopt2);
|
---|
926 | msg_->bcast(ezapt2);
|
---|
927 |
|
---|
928 | if (method_ && !strcmp(method_,"opt1")) {
|
---|
929 | set_energy(eopt1);
|
---|
930 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
931 | *ref_to_mp2_acc);
|
---|
932 | }
|
---|
933 | else if (method_ && !strcmp(method_,"opt2")) {
|
---|
934 | set_energy(eopt2);
|
---|
935 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
936 | *ref_to_mp2_acc);
|
---|
937 | }
|
---|
938 | else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
|
---|
939 | set_energy(ezapt2);
|
---|
940 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
941 | *ref_to_mp2_acc);
|
---|
942 | }
|
---|
943 | else {
|
---|
944 | if (!(!method_ || !strcmp(method_,"zapt"))) {
|
---|
945 | ExEnv::out0() << indent
|
---|
946 | << "MBPT2: bad method: " << method_ << ", using zapt" << endl;
|
---|
947 | }
|
---|
948 | set_energy(ezapt2);
|
---|
949 | set_actual_value_accuracy(reference_->actual_value_accuracy()
|
---|
950 | *ref_to_mp2_acc);
|
---|
951 | }
|
---|
952 |
|
---|
953 | free(trans_int1);
|
---|
954 | free(trans_int2);
|
---|
955 | free(trans_int3);
|
---|
956 | free(trans_int4);
|
---|
957 | free(trans_int4_tmp);
|
---|
958 | if (nsocc) free(socc_sum);
|
---|
959 | if (nsocc) free(socc_sum_tmp);
|
---|
960 | if (nsocc) free(mo_int_do_so_vir);
|
---|
961 | if (nsocc) free(mo_int_tmp);
|
---|
962 | free(evals_open);
|
---|
963 | free(myshells);
|
---|
964 | free(shellsize);
|
---|
965 | free (myshellsizes);
|
---|
966 | free (split_info);
|
---|
967 | free(sorted_shells);
|
---|
968 | free(nbf);
|
---|
969 | free(proc);
|
---|
970 |
|
---|
971 | delete[] scf_vector;
|
---|
972 | delete[] scf_vector_dat;
|
---|
973 |
|
---|
974 | }
|
---|
975 |
|
---|
976 | /////////////////////////////////////////////////////////////////
|
---|
977 | // Function iquicksort performs a quick sort (larger -> smaller)
|
---|
978 | // of the integer data in item by the integer indices in index;
|
---|
979 | // data in item remain unchanged
|
---|
980 | /////////////////////////////////////////////////////////////////
|
---|
981 | static void
|
---|
982 | iquicksort(int *item,int *index,int n)
|
---|
983 | {
|
---|
984 | int i;
|
---|
985 | if (n<=0) return;
|
---|
986 | for (i=0; i<n; i++) {
|
---|
987 | index[i] = i;
|
---|
988 | }
|
---|
989 | iqs(item,index,0,n-1);
|
---|
990 | }
|
---|
991 |
|
---|
992 | static void
|
---|
993 | iqs(int *item,int *index,int left,int right)
|
---|
994 | {
|
---|
995 | register int i,j;
|
---|
996 | int x,y;
|
---|
997 |
|
---|
998 | i=left; j=right;
|
---|
999 | x=item[index[(left+right)/2]];
|
---|
1000 |
|
---|
1001 | do {
|
---|
1002 | while(item[index[i]]>x && i<right) i++;
|
---|
1003 | while(x>item[index[j]] && j>left) j--;
|
---|
1004 |
|
---|
1005 | if (i<=j) {
|
---|
1006 | if (item[index[i]] != item[index[j]]) {
|
---|
1007 | y=index[i];
|
---|
1008 | index[i]=index[j];
|
---|
1009 | index[j]=y;
|
---|
1010 | }
|
---|
1011 | i++; j--;
|
---|
1012 | }
|
---|
1013 | } while(i<=j);
|
---|
1014 |
|
---|
1015 | if (left<j) iqs(item,index,left,j);
|
---|
1016 | if (i<right) iqs(item,index,i,right);
|
---|
1017 | }
|
---|
1018 |
|
---|
1019 | ////////////////////////////////////////////////////////////////////
|
---|
1020 | // Function findprocminmax finds the processor with the most/fewest
|
---|
1021 | // basis functions and the corresponding number of basis functions
|
---|
1022 | ////////////////////////////////////////////////////////////////////
|
---|
1023 | static void
|
---|
1024 | findprocminmax(int *nbf, int nproc,
|
---|
1025 | int *procmin, int *procmax, int *minbf, int *maxbf)
|
---|
1026 | {
|
---|
1027 | int i;
|
---|
1028 |
|
---|
1029 | *procmax = *procmin = 0;
|
---|
1030 | *maxbf = nbf[0];
|
---|
1031 | *minbf = nbf[0];
|
---|
1032 |
|
---|
1033 | for (i=1; i<nproc; i++) {
|
---|
1034 | if (nbf[i] > *maxbf) {
|
---|
1035 | *maxbf = nbf[i];
|
---|
1036 | *procmax = i;
|
---|
1037 | }
|
---|
1038 | if (nbf[i] < *minbf) {
|
---|
1039 | *minbf = nbf[i];
|
---|
1040 | *procmin = i;
|
---|
1041 | }
|
---|
1042 | }
|
---|
1043 | }
|
---|
1044 |
|
---|
1045 | /////////////////////////////////////////////////////////////////
|
---|
1046 | // Function findshellmax finds the largest shell on a processor
|
---|
1047 | /////////////////////////////////////////////////////////////////
|
---|
1048 | static void
|
---|
1049 | findshellmax(int *myshellsizes, int nRshell, int *shellmax, int *shellmaxindex)
|
---|
1050 | {
|
---|
1051 | int i;
|
---|
1052 |
|
---|
1053 | *shellmax = myshellsizes[0];
|
---|
1054 | *shellmaxindex = 0;
|
---|
1055 |
|
---|
1056 | for (i=1; i<nRshell; i++) {
|
---|
1057 | if (myshellsizes[i] > *shellmax) {
|
---|
1058 | *shellmax = myshellsizes[i];
|
---|
1059 | *shellmaxindex = i;
|
---|
1060 | }
|
---|
1061 | }
|
---|
1062 | }
|
---|
1063 |
|
---|
1064 | //////////////////////////////////////////////////////////////
|
---|
1065 | // Function expand_array expands the dimension of an array of
|
---|
1066 | // doubles by 1;
|
---|
1067 | // NB: THE ARRAY MUST HAVE BEEN ALLOCATED WITH MALLOC
|
---|
1068 | //////////////////////////////////////////////////////////////
|
---|
1069 | static void
|
---|
1070 | expandintarray(int *&a, int olddim)
|
---|
1071 | {
|
---|
1072 | int i;
|
---|
1073 | int *tmp;
|
---|
1074 |
|
---|
1075 | tmp = (int*) malloc((olddim+1)*sizeof(int));
|
---|
1076 |
|
---|
1077 | for (i=0; i<olddim; i++) {
|
---|
1078 | tmp[i] = a[i];
|
---|
1079 | }
|
---|
1080 | tmp[olddim] = 0;
|
---|
1081 |
|
---|
1082 | free(a);
|
---|
1083 |
|
---|
1084 | a = tmp;
|
---|
1085 | }
|
---|
1086 |
|
---|
1087 | ////////////////////////////////////////////////////////////////////////////
|
---|
1088 |
|
---|
1089 | // Local Variables:
|
---|
1090 | // mode: c++
|
---|
1091 | // c-file-style: "CLJ-CONDENSED"
|
---|
1092 | // End:
|
---|