source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/hsosv2lb.cc@ 72461c

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable
Last change on this file since 72461c was 860145, checked in by Frederik Heber <heber@…>, 8 years ago

Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'

  • Property mode set to 100644
File size: 35.5 KB
Line 
1//
2// hsosv2lb.cc
3//
4// Copyright (C) 1996 Limit Point Systems, Inc.
5//
6// Author: Ida Nielsen <ida@kemi.aau.dk>
7// Maintainer: LPS
8//
9// This file is part of the SC Toolkit.
10//
11// The SC Toolkit is free software; you can redistribute it and/or modify
12// it under the terms of the GNU Library General Public License as published by
13// the Free Software Foundation; either version 2, or (at your option)
14// any later version.
15//
16// The SC Toolkit is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19// GNU Library General Public License for more details.
20//
21// You should have received a copy of the GNU Library General Public License
22// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24//
25// The U.S. Government is granted a limited license as per AL 91-7.
26//
27
28#include <math.h>
29
30#include <util/misc/timer.h>
31#include <util/misc/formio.h>
32#include <chemistry/molecule/molecule.h>
33#include <chemistry/qc/mbpt/mbpt.h>
34#include <chemistry/qc/mbpt/bzerofast.h>
35
36using namespace std;
37using namespace sc;
38
39static void iqs(int *item,int *index,int left,int right);
40static void iquicksort(int *item,int *index,int n);
41static void findprocminmax(int *nbf, int nproc,
42 int *procmin, int *procmax, int *minbf, int *maxbf);
43static void findshellmax(int *myshellsizes, int nRshell, int *shellmax,
44 int *shellmaxindex);
45static void expandintarray(int *&a, int dim);
46
47void
48MBPT2::compute_hsos_v2_lb()
49{
50 int i, j, k, l;
51 int s1, s2;
52 int a, b;
53 int isocc, asocc; // indices running over singly occupied orbitals
54 int nfuncmax = basis()->max_nfunction_in_shell();
55 int nvir;
56 int nshell;
57 int shellmax;
58 int shellmaxindex;
59 int nocc=0,ndocc=0,nsocc=0;
60 int i_offset;
61 int npass, pass;
62 int ni;
63 int np, nq, nr, ns;
64 int P, Q, R, S;
65 int p, q, r, s;
66 int bf1, bf2, bf3, bf4;
67 int bf3_offset;
68 int nbfmoved;
69 int nbfav; // average number of r basis functions per node
70 int minbf, maxbf; // max/min number of (r) basis functions on a node
71 int index;
72 int compute_index;
73 int col_index;
74 int tmp_index;
75 int dim_ij;
76 int docc_index, socc_index, vir_index;
77 int me;
78 int nproc;
79 int procmin, procmax; // processor with most/fewest basis functions
80 int rest;
81 int r_offset;
82 int min;
83 int iproc;
84 int nRshell;
85 int imyshell;
86 int *myshells; // the R indices processed by node me
87 int *myshellsizes; // sizes of the shells (after split) on node me
88 int *split_info; // on each node: offset for each shell; -1 if shell not split
89 int *shellsize; // size of each shell
90 int *sorted_shells; // sorted shell indices: large shells->small shells
91 int *nbf; // number of basis functions processed by each node
92 int *proc; // element k: processor which will process shell k
93 int aoint_computed = 0;
94 double A, B, C, ni_top, max, ni_double; // variables used to compute ni
95 double *evals_open; // reordered scf eigenvalues
96 const double *intbuf; // 2-electron AO integral buffer
97 double *trans_int1; // partially transformed integrals
98 double *trans_int2; // partially transformed integrals
99 double *trans_int3; // partially transformed integrals
100 double *trans_int4; // fully transformed integrals
101 double *trans_int4_tmp; // scratch array
102 double *mo_int_do_so_vir=0;//mo integral (is|sa); i:d.o.,s:s.o.,a:vir
103 double *mo_int_tmp=0; // scratch array used in global summations
104 double *socc_sum=0; // sum of 2-el integrals involving only s.o.'s
105 double *socc_sum_tmp=0;// scratch array
106 double *iqrs, *iprs;
107 double *iars_ptr;
108 double iars;
109 double iajr;
110 double *iajr_ptr;
111 double *iajb;
112 double pqrs;
113 double *c_qa;
114 double *c_rb, *c_pi, *c_qi, *c_sj;
115 double delta_ijab;
116 double delta;
117 double contrib1, contrib2;
118 double ecorr_opt2=0,ecorr_opt1=0;
119 double ecorr_zapt2;
120 double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
121 double escf;
122 double eopt2,eopt1,ezapt2;
123 double tol; // log2 of the erep tolerance (erep < 2^tol => discard)
124
125 me = msg_->me();
126
127 ExEnv::out0() << indent << "Just entered OPT2 program (opt2v2lb)" << endl;
128
129 tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10
130
131 nproc = msg_->n();
132 ExEnv::out0() << indent << "nproc = " << nproc << endl;
133
134 ndocc = nsocc = 0;
135 const double epsilon = 1.0e-4;
136 for (i=0; i<oso_dimension()->n(); i++) {
137 if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
138 else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
139 }
140
141 // Do a few preliminary tests to make sure the desired calculation
142 // can be done (and appears to be meaningful!)
143
144 if (ndocc == 0 && nsocc == 0) {
145 ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
146 abort();
147 }
148
149 if (nfzc > ndocc) {
150 ExEnv::err0()
151 << "The number of frozen core orbitals exceeds the number" << endl
152 << "of doubly occupied orbitals; program exiting" << endl;
153 abort();
154 }
155
156 if (nfzv > noso - ndocc - nsocc) {
157 ExEnv::err0()
158 << "The number of frozen virtual orbitals exceeds the number" << endl
159 << "of unoccupied orbitals; program exiting" << endl;
160 abort();
161 }
162
163 ndocc = ndocc - nfzc;
164 // nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb.
165 nvir = noso - ndocc - nfzc - nfzv;
166 // nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb.
167 nocc = ndocc + nsocc;
168 nshell = basis()->nshell();
169
170 // Allocate storage for some arrays used for keeping track of which R
171 // indices are processed by each node
172 shellsize = (int*) malloc(nshell*sizeof(int));
173 sorted_shells = (int*) malloc(nshell*sizeof(int));
174 nbf = (int*) malloc(nproc*sizeof(int));
175 proc = (int*) malloc(nshell*sizeof(int));
176
177
178 ///////////////////////////////////////////////////////
179 // Begin distributing R shells between nodes so all
180 // nodes get ca. the same number of r basis functions
181 ///////////////////////////////////////////////////////
182
183 // Compute the size of each shell
184 for (i=0; i<nshell; i++) {
185 shellsize[i] = basis()->shell(i).nfunction();
186 }
187
188 // Do an index sort (large -> small) of shellsize to form sorted_shells
189 iquicksort(shellsize,sorted_shells,nshell);
190
191 // Initialize nbf
192 for (i=0; i<nproc; i++) nbf[i] = 0;
193
194 for (i=0; i<nshell; i++) {
195 min = nbf[0];
196 iproc = 0;
197 for (j=1; j<nproc; j++) {
198 if (nbf[j] < min) {
199 iproc = j;
200 min = nbf[j];
201 }
202 }
203 proc[sorted_shells[i]] = iproc;
204 nbf[iproc] += shellsize[sorted_shells[i]];
205 }
206 if (me == 0) {
207 ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl;
208 for (i=0; i<nproc; i++) {
209 if (i%12 == 0) ExEnv::out0() << indent;
210 ExEnv::out0() << scprintf(" %4i",nbf[i]);
211 if ((i+1)%12 == 0) ExEnv::out0() << endl;
212 }
213 ExEnv::out0() << endl;
214 }
215
216 // Determine which shells are to be processed by node me
217 nRshell = 0;
218 for (i=0; i<nshell; i++) {
219 if (proc[i] == me) nRshell++;
220 }
221 myshells = (int*) malloc(nRshell*sizeof(int));
222 imyshell = 0;
223 for (i=0; i<nshell; i++) {
224 if (proc[i] == me) {
225 myshells[imyshell] = i;
226 imyshell++;
227 }
228 }
229
230 /////////////////////////////////////////////////////////////
231 // End of preliminary distribution of R shells between nodes
232 /////////////////////////////////////////////////////////////
233
234 // Compute the average number of basis functions per node
235 nbfav = nbasis/nproc;
236 if (nbasis%nproc) nbfav++;
237
238 myshellsizes = (int*) malloc(nRshell*sizeof(int));
239 split_info = (int*) malloc(nRshell*sizeof(int));
240 for (j=0; j<nRshell; j++) {
241 myshellsizes[j] = basis()->shell(myshells[j]).nfunction();
242 split_info[j] = -1;
243 }
244
245 // Find the processor with the most/fewest basis functions
246 findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf);
247 if (maxbf > nbfav) {
248 ExEnv::out0() << indent << "Redistributing basis functions" << endl;
249 }
250
251 while (maxbf > nbfav) {
252 msg_->sync();
253 if (me == procmax) {
254
255 findshellmax(myshellsizes, nRshell, &shellmax, &shellmaxindex);
256 nbfmoved = 0;
257 while (maxbf>nbfav && minbf<nbfav && shellmax>1) {
258 shellmax--;
259 nbfmoved++;
260 maxbf--;
261 minbf++;
262 }
263 myshellsizes[shellmaxindex] = shellmax;
264 if (split_info[shellmaxindex] == -1) split_info[shellmaxindex] = 0;
265 shellmax += nbfmoved;
266
267 // Send nbfmoved from procmax to all other nodes
268 msg_->bcast(nbfmoved,procmax);
269
270 // Send variables to node procmin
271 msg_->send(procmin,&myshells[shellmaxindex],1);
272 msg_->send(procmin,&shellmax,1);
273
274 }
275 else {
276 // Receive nbfmoved from procmax
277 msg_->bcast(nbfmoved,procmax);
278 }
279
280 nbf[procmax] -= nbfmoved;
281
282 if (me == procmin) {
283 expandintarray(myshellsizes,nRshell);
284 expandintarray(myshells,nRshell);
285 expandintarray(split_info,nRshell);
286 nRshell++;
287 myshellsizes[nRshell-1] = nbfmoved;
288 msg_->recv(procmax,&myshells[nRshell-1],1);
289 msg_->recv(procmax,&split_info[nRshell-1],1);
290 split_info[nRshell-1] -= myshellsizes[nRshell-1];
291 }
292
293 nbf[procmin] += nbfmoved;
294 msg_->sync();
295 findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf);
296
297 }
298
299 if (me == 0) {
300 ExEnv::out0() << indent
301 << "New distribution of basis functions between nodes:" << endl;
302 for (i=0; i<nproc; i++) {
303 if (i%12 == 0) ExEnv::out0() << indent;
304 ExEnv::out0() << scprintf(" %4i",nbf[i]);
305 if ((i+1)%12 == 0) ExEnv::out0() << endl;
306 }
307 ExEnv::out0() << endl;
308 }
309
310
311 //////////////////////////////////////////////////////////
312 // End of distribution of R shells and r basis functions
313 //////////////////////////////////////////////////////////
314
315 // Compute batch size ni for opt2 loops;
316 // need to store the following arrays of type double : trans_int1-4,
317 // trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp,
318 // mo_int_do_so_vir, mo_int_tmp,
319 // and the following arrays of type int: myshells, shellsize,
320 // sorted_shells, nbf, and proc
321 A = -0.5*sizeof(double)*nbf[me]*nvir;
322 B = sizeof(double)*(nfuncmax*nfuncmax*nbasis + nvir + nocc*nbf[me]*nvir
323 + nbf[me]*nvir*0.5);
324 C = sizeof(double)*(2*nvir*nvir + (nbasis+1)*(nvir+nocc) + 2*nsocc
325 + 2*ndocc*nsocc*(nvir-nsocc))
326 + sizeof(int)*(3*nshell + nproc + nRshell);
327 ni_top = -B/(2*A);
328 max = A*ni_top*ni_top + B*ni_top +C;
329 if (max <= mem_alloc) {
330 ni = nocc;
331 }
332 else {
333 ni_double = (-B + sqrt((double)(B*B - 4*A*(C-mem_alloc))))/(2*A);
334 ni = (int) ni_double;
335 if (ni > nocc) ni = nocc;
336 max = mem_alloc;
337 }
338
339 size_t mem_remaining = mem_alloc - (size_t)max;
340
341 // Set ni equal to the smallest batch size for any node
342 msg_->min(ni);
343 msg_->bcast(ni);
344
345 if (ni < nsocc) {
346 ExEnv::err0() << "Not enough memory allocated" << endl;
347 abort();
348 }
349
350 if (ni < 1) { // this applies only to a closed shell case
351 ExEnv::err0() << "Not enough memory allocated" << endl;
352 abort();
353 }
354
355 ExEnv::out0() << indent << "Computed batchsize: " << ni << endl;
356
357 if (nocc == ni) {
358 npass = 1;
359 rest = 0;
360 }
361 else {
362 rest = nocc%ni;
363 npass = (nocc - rest)/ni + 1;
364 if (rest == 0) npass--;
365 }
366
367 if (me == 0) {
368 ExEnv::out0() << indent << " npass rest nbasis nshell nfuncmax"
369 " ndocc nsocc nvir nfzc nfzv" << endl;
370 ExEnv::out0() << indent
371 << scprintf(" %-4i %-3i %-5i %-4i %-3i"
372 " %-3i %-3i %-3i %-3i %-3i\n",
373 npass,rest,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);
374 ExEnv::out0() << indent
375 << scprintf("Using %i bytes of memory",mem_alloc) << endl;
376 }
377
378 //////////////////////
379 // Test that ni is OK
380 //////////////////////
381 if (me == 0) {
382 ExEnv::out0() << indent
383 << scprintf("Memory allocated: %i", mem_alloc) << endl;
384 ExEnv::out0() << indent
385 << scprintf("Memory used : %lf", A*ni*ni+B*ni+C) << endl;
386 if (A*ni*ni + B*ni +C > mem_alloc) {
387 ExEnv::err0() << "Problems with memory allocation: "
388 << "Using more memory than allocated" << endl;
389 abort();
390 }
391 }
392
393 //////////////////////////////////////////////////////////////////
394 // The scf vector might be distributed between the nodes,
395 // but for OPT2 each node needs its own copy of the vector;
396 // therefore, put a copy of the scf vector on each node;
397 // while doing this, duplicate columns corresponding to singly
398 // occupied orbitals and order columns as [socc docc socc unocc]
399 // Also rearrange scf eigenvalues as [socc docc socc unocc]
400 // want socc first to get the socc's in the first batch
401 // (need socc's to compute energy denominators - see
402 // socc_sum comment below)
403 /////////////////////////////////////////////////////////
404 evals_open = (double*) malloc((noso+nsocc-nfzc-nfzv)*sizeof(double));
405
406 RefDiagSCMatrix occ;
407 RefDiagSCMatrix evals;
408 RefSCMatrix Scf_Vec;
409 eigen(evals, Scf_Vec, occ);
410
411 if (debug_) {
412 evals.print("eigenvalues");
413 Scf_Vec.print("eigenvectors");
414 }
415
416 double *scf_vectort_dat = new double[nbasis*noso];
417 Scf_Vec->convert(scf_vectort_dat);
418
419 double** scf_vectort = new double*[nocc + nvir];
420
421 int idoc = 0, ivir = 0, isoc = 0;
422 for (i=nfzc; i<noso-nfzv; i++) {
423 if (occ(i) >= 2.0 - epsilon) {
424 evals_open[idoc+nsocc] = evals(i);
425 scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
426 idoc++;
427 }
428 else if (occ(i) >= 1.0 - epsilon) {
429 evals_open[isoc] = evals(i);
430 scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
431 evals_open[isoc+nocc] = evals(i);
432 scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
433 isoc++;
434 }
435 else {
436 if (ivir < nvir) {
437 evals_open[ivir+nocc+nsocc] = evals(i);
438 scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
439 }
440 ivir++;
441 }
442 }
443 // need the transpose of the vector
444 double **scf_vector = new double*[nbasis];
445 double *scf_vector_dat = new double[(nocc+nvir)*nbasis];
446 for (i=0; i<nbasis; i++) {
447 scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
448 for (j=0; j<nocc+nvir; j++) {
449 scf_vector[i][j] = scf_vectort[j][i];
450 }
451 }
452 delete[] scf_vectort;
453 delete[] scf_vectort_dat;
454
455 ////////////////////////////////////////
456 // Allocate storage for various arrays
457 ////////////////////////////////////////
458
459 dim_ij = nocc*ni - ni*(ni - 1)/2;
460
461 trans_int1 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double));
462 trans_int2 = (double*) malloc(nvir*ni*sizeof(double));
463 trans_int3 = (double*) malloc(nbf[me]*nvir*dim_ij*sizeof(double));
464 trans_int4 = (double*) malloc(nvir*nvir*sizeof(double));
465 trans_int4_tmp = (double*) malloc(nvir*nvir*sizeof(double));
466 if (nsocc) socc_sum = (double*) malloc(nsocc*sizeof(double));
467 if (nsocc) socc_sum_tmp = (double*) malloc(nsocc*sizeof(double));
468 if (nsocc) mo_int_do_so_vir =
469 (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
470 if (nsocc) mo_int_tmp =
471 (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
472
473 if (nsocc) bzerofast(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc));
474
475 // create the integrals object
476 integral()->set_storage(mem_remaining);
477 tbint_ = integral()->electron_repulsion();
478 intbuf = tbint_->buffer();
479
480 /////////////////////////////////////
481 // Begin opt2 loops
482 /////////////////////////////////////
483
484
485 for (pass=0; pass<npass; pass++) {
486 i_offset = pass*ni;
487 if ((pass == npass - 1) && (rest != 0)) ni = rest;
488
489 r_offset = 0;
490 bzerofast(trans_int3,nbf[me]*nvir*dim_ij);
491
492 tim_enter("RS loop");
493
494 for (imyshell=0; imyshell<nRshell; imyshell++) {
495
496 R = myshells[imyshell];
497 nr = myshellsizes[imyshell];
498
499 for (S = 0; S < nshell; S++) {
500 ns = basis()->shell(S).nfunction();
501 tim_enter("bzerofast trans_int1");
502 bzerofast(trans_int1,nfuncmax*nfuncmax*nbasis*ni);
503 tim_exit("bzerofast trans_int1");
504
505 tim_enter("PQ loop");
506
507 for (P = 0; P < nshell; P++) {
508 np = basis()->shell(P).nfunction();
509
510 for (Q = 0; Q <= P; Q++) {
511 if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {
512 continue; // skip ereps less than tol
513 }
514
515 aoint_computed++;
516
517 nq = basis()->shell(Q).nfunction();
518
519 tim_enter("erep");
520 tbint_->compute_shell(P,Q,R,S);
521 tim_exit("erep");
522
523 tim_enter("1. quart. tr.");
524
525 for (bf1 = 0; bf1 < np; bf1++) {
526 p = basis()->shell_to_function(P) + bf1;
527
528 for (bf2 = 0; bf2 < nq; bf2++) {
529 q = basis()->shell_to_function(Q) + bf2;
530 if (q > p) {
531 // if q > p: want to skip the loops over bf3-4
532 // and larger bf2 values, so increment bf1 by 1
533 // ("break")
534 break;
535 }
536
537 for (bf3 = 0; bf3 < nr; bf3++) {
538 bf3_offset = 0;
539 if (split_info[imyshell] != -1) bf3_offset = split_info[imyshell];
540
541 for (bf4 = 0; bf4 < ns; bf4++) {
542
543 index = bf4 + ns*(bf3+bf3_offset +
544 basis()->shell(R).nfunction()*(bf2 + nq*bf1));
545
546 if (fabs(intbuf[index]) > 1.0e-15) {
547 pqrs = intbuf[index];
548
549 iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
550 iprs = &trans_int1[((bf4*nr + bf3)*nbasis + p)*ni];
551
552 if (p == q) pqrs *= 0.5;
553 col_index = i_offset;
554 c_pi = &scf_vector[p][col_index];
555 c_qi = &scf_vector[q][col_index];
556
557 for (i=ni; i; i--) {
558 *iqrs++ += pqrs * *c_pi++;
559 *iprs++ += pqrs * *c_qi++;
560 }
561 }
562 } // exit bf4 loop
563 } // exit bf3 loop
564 } // exit bf2 loop
565 } // exit bf1 loop
566 tim_exit("1. quart. tr.");
567 } // exit Q loop
568 } // exit P loop
569 tim_exit("PQ loop");
570
571 // Begin second and third quarter transformations
572
573 for (bf3 = 0; bf3 < nr; bf3++) {
574 r = r_offset + bf3;
575
576 for (bf4 = 0; bf4 < ns; bf4++) {
577 s = basis()->shell_to_function(S) + bf4;
578
579 tim_enter("bzerofast trans_int2");
580 bzerofast(trans_int2,nvir*ni);
581 tim_exit("bzerofast trans_int2");
582
583 tim_enter("2. quart. tr.");
584
585 for (q = 0; q < nbasis; q++) {
586 iars_ptr = trans_int2;
587 iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
588 c_qa = &scf_vector[q][nocc];
589
590 for (a = 0; a < nvir; a++) {
591
592 for (i=ni; i; i--) {
593 *iars_ptr++ += *c_qa * *iqrs++;
594 }
595
596 iqrs -= ni;
597 c_qa++;
598 }
599 } // exit q loop
600 tim_exit("2. quart. tr.");
601
602 // Begin third quarter transformation
603
604 tim_enter("3. quart. tr.");
605
606 for (i=0; i<ni; i++) {
607 tmp_index = i*(i+1)/2 + i*i_offset;
608
609 for (a=0; a<nvir; a++) {
610 iars = trans_int2[a*ni + i];
611 c_sj = scf_vector[s];
612 iajr_ptr = &trans_int3[tmp_index + dim_ij*(a + nvir*r)];
613
614 for (j=0; j<=i+i_offset; j++) {
615 *iajr_ptr++ += *c_sj++ * iars;
616 }
617 }
618 } // exit i loop
619 tim_exit("3. quart. tr.");
620
621 } // exit bf4 loop
622 } // exit bf3 loop
623
624 } // exit S loop
625 r_offset += nr;
626 } // exit R loop
627 tim_exit("RS loop");
628
629 // Begin fourth quarter transformation;
630 // first tansform integrals with only s.o. indices;
631 // these integrals are needed to compute the denominators
632 // in the various terms contributing to the correlation energy
633 // and must all be computed in the first pass;
634 // the integrals are summed into the array socc_sum:
635 // socc_sum[isocc] = sum over asocc of (isocc asocc|asocc isocc)
636 // (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s)
637 // the individual integrals are not saved here, only the sums are kept
638
639 if (pass == 0) {
640 tim_enter("4. quart. tr.");
641 if (nsocc) bzerofast(socc_sum,nsocc);
642 for (isocc=0; isocc<nsocc; isocc++) {
643
644 index = 0;
645 for (i=0; i<nRshell; i++) {
646 for (j=0; j<myshellsizes[i]; j++) {
647 r = basis()->shell_to_function(myshells[i]) + j;
648 if (split_info[i] != -1) r += split_info[i];
649
650 for (asocc=0; asocc<nsocc; asocc++) {
651 socc_sum[isocc] += scf_vector[r][nocc+asocc]*
652 trans_int3[isocc*(isocc+1)/2 + isocc*i_offset
653 + isocc + dim_ij*(asocc + nvir*index)];
654 }
655 index++;
656 }
657 }
658 } // exit i loop
659
660 tim_exit("4. quart. tr.");
661
662 // Sum socc_sum contributions from each node (only if nsocc > 0
663 // since gop1 will fail if nsocc = 0)
664 if (nsocc > 0) {
665 tim_enter("global sum socc_sum");
666 msg_->sum(socc_sum,nsocc,socc_sum_tmp);
667 tim_exit("global sum socc_sum");
668 }
669
670 }
671
672 // Now we have all the sums of integrals involving s.o.'s (socc_sum);
673 // begin fourth quarter transformation for all integrals (including
674 // integrals with only s.o. indices); use restriction j <= (i_offset+i)
675 // to save flops
676
677 compute_index = 0;
678
679 for (i=0; i<ni; i++) {
680
681 for (j=0; j <= (i_offset+i); j++) {
682
683 tim_enter("4. quart. tr.");
684
685 bzerofast(trans_int4,nvir*nvir);
686
687 index = 0;
688 for (k=0; k<nRshell; k++) {
689 for (l=0; l<myshellsizes[k]; l++) {
690 r = basis()->shell_to_function(myshells[k]) + l;
691 if (split_info[k] != -1) r += split_info[k];
692
693 for (a=0; a<nvir; a++) {
694 iajb = &trans_int4[a*nvir];
695 iajr = trans_int3[i*(i+1)/2 + i*i_offset + j + dim_ij*(a+nvir*index)];
696 c_rb = &scf_vector[r][nocc];
697
698 for (b=0; b<nvir; b++) {
699 *iajb++ += *c_rb++ * iajr;
700 }
701 }
702 index++;
703 }
704 } // end of k loop
705
706 tim_exit("4. quart. tr.");
707
708 tim_enter("global sum trans_int4");
709 msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);
710 tim_exit("global sum trans_int4");
711
712 // We now have the fully transformed integrals (ia|jb)
713 // for one i, one j (j <= i_offset+i), and all a and b;
714 // compute contribution to the OPT1 and OPT2 correlation
715 // energies; use restriction b <= a to save flops
716
717 tim_enter("compute ecorr");
718
719 for (a=0; a<nvir; a++) {
720 for (b=0; b<=a; b++) {
721 compute_index++;
722 if (compute_index%nproc != me) continue;
723
724 docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
725 + (j >= nsocc && j < nocc);
726 socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
727 vir_index = (a >= nsocc) + (b >= nsocc);
728
729 if (socc_index >= 3) continue; // skip to next b value
730
731 delta_ijab = evals_open[i_offset+i] + evals_open[j]
732 - evals_open[nocc+a] - evals_open[nocc+b];
733
734 // Determine integral type and compute energy contribution
735 if (docc_index == 2 && vir_index == 2) {
736 if (i_offset+i == j && a == b) {
737 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
738 ecorr_opt2 += contrib1/delta_ijab;
739 ecorr_opt1 += contrib1/delta_ijab;
740 }
741 else if (i_offset+i == j || a == b) {
742 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
743 ecorr_opt2 += 2*contrib1/delta_ijab;
744 ecorr_opt1 += 2*contrib1/delta_ijab;
745 }
746 else {
747 contrib1 = trans_int4[a*nvir + b];
748 contrib2 = trans_int4[b*nvir + a];
749 ecorr_opt2 += 4*(contrib1*contrib1 + contrib2*contrib2
750 - contrib1*contrib2)/delta_ijab;
751 ecorr_opt1 += 4*(contrib1*contrib1 + contrib2*contrib2
752 - contrib1*contrib2)/delta_ijab;
753 }
754 }
755 else if (docc_index == 2 && socc_index == 2) {
756 contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
757 (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
758 ecorr_opt2 += contrib1/
759 (delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
760 ecorr_opt1 += contrib1/delta_ijab;
761 }
762 else if (socc_index == 2 && vir_index == 2) {
763 contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
764 (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
765 ecorr_opt2 += contrib1/
766 (delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
767 ecorr_opt1 += contrib1/delta_ijab;
768 }
769 else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
770 if (i_offset+i == j) {
771 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
772 ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
773 ecorr_opt1 += contrib1/delta_ijab;
774 }
775 else {
776 contrib1 = trans_int4[a*nvir + b];
777 contrib2 = trans_int4[b*nvir + a];
778 ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
779 - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[b]);
780 ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
781 - contrib1*contrib2)/delta_ijab;
782 }
783 }
784 else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
785 contrib1 = trans_int4[b*nvir+a]*trans_int4[b*nvir+a];
786 if (j == b) {
787 // To compute the energy contribution from an integral of the
788 // type (is1|s1a) (i=d.o., s1=s.o., a=unocc.), we need the
789 // (is|sa) integrals for all s=s.o.; these integrals are
790 // therefore stored here in the array mo_int_do_so_vir, and
791 // the energy contribution is computed after exiting the loop
792 // over i-batches (pass)
793 mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
794 (i_offset+i-nsocc + ndocc*b)] =
795 trans_int4[b*nvir + a];
796 ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
797 ecorr_opt1 += 1.5*contrib1/delta_ijab;
798 ecorr_zapt2_contrib += contrib1/
799 (delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
800 + 0.5*contrib1/delta_ijab;
801 }
802 else {
803 ecorr_opt2 += contrib1/
804 (delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
805 ecorr_opt1 += contrib1/delta_ijab;
806 }
807 }
808 else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
809 if (a == b) {
810 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
811 ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
812 ecorr_opt1 += contrib1/delta_ijab;
813 }
814 else {
815 contrib1 = trans_int4[a*nvir + b];
816 contrib2 = trans_int4[b*nvir + a];
817 ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
818 - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[j]);
819 ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
820 - contrib1*contrib2)/delta_ijab;
821 }
822 }
823 } // exit b loop
824 } // exit a loop
825 tim_exit("compute ecorr");
826 } // exit j loop
827 } // exit i loop
828
829 if (nsocc == 0 && npass > 1 && pass < npass - 1) {
830 double passe = ecorr_opt2;
831 msg_->sum(passe);
832 ExEnv::out0() << indent
833 << "Partial correlation energy for pass " << pass << ":" << endl;
834 ExEnv::out0() << indent
835 << scprintf(" restart_ecorr = %14.10f", passe)
836 << endl;
837 ExEnv::out0() << indent
838 << scprintf(" restart_orbital_v2lb = %d", ((pass+1) * ni))
839 << endl;
840 }
841 } // exit loop over i-batches (pass)
842
843
844
845 // Compute contribution from excitations of the type is1 -> s1a where
846 // i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a,
847 // where i and a have the same spin, contribute to this term;
848 // (Brillouin's theorem not satisfied for ROHF wave functions);
849 // do this only if nsocc > 0 since gop1 will fail otherwise
850
851 tim_enter("compute ecorr");
852
853 if (nsocc > 0) {
854 tim_enter("global sum mo_int_do_so_vir");
855 msg_->sum(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc),mo_int_tmp);
856 tim_exit("global sum mo_int_do_so_vir");
857 }
858
859 // Add extra contribution for triplet and higher spin multiplicities
860 // contribution = sum over s1 and s2<s1 of (is1|s1a)*(is2|s2a)/delta
861
862 if (me == 0 && nsocc) {
863 for (i=0; i<ndocc; i++) {
864
865 for (a=0; a<nvir-nsocc; a++) {
866 delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
867
868 for (s1=0; s1<nsocc; s1++) {
869
870 for (s2=0; s2<s1; s2++) {
871 contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s1)]*
872 mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s2)]/delta;
873 ecorr_opt2 += contrib1;
874 ecorr_opt1 += contrib1;
875 }
876 }
877 } // exit a loop
878 } // exit i loop
879 }
880
881 tim_exit("compute ecorr");
882
883 ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
884 ecorr_opt2 += ecorr_opt2_contrib;
885 msg_->sum(ecorr_opt1);
886 msg_->sum(ecorr_opt2);
887 msg_->sum(ecorr_zapt2);
888 msg_->sum(aoint_computed);
889
890 escf = reference_->energy();
891 hf_energy_ = escf;
892
893 if (me == 0) {
894 eopt2 = escf + ecorr_opt2;
895 eopt1 = escf + ecorr_opt1;
896 ezapt2 = escf + ecorr_zapt2;
897
898 // Print out various energies etc.
899 ExEnv::out0() << indent
900 << "Number of shell quartets for which AO integrals would" << endl
901 << indent << "have been computed without bounds checking: "
902 << npass*nshell*nshell*(nshell+1)*(nshell+1)/2 << endl;
903 ExEnv::out0() << indent
904 << "Number of shell quartets for which AO integrals" << endl
905 << indent << "were computed: " << aoint_computed << endl;
906
907 ExEnv::out0() << indent
908 << scprintf("ROHF energy [au]: %17.12lf\n", escf);
909 ExEnv::out0() << indent
910 << scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
911 ExEnv::out0() << indent
912 << scprintf("OPT2 second order correction [au]: %17.12lf\n",
913 ecorr_opt2);
914 ExEnv::out0() << indent
915 << scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
916 ExEnv::out0() << indent
917 << scprintf("ZAPT2 correlation energy [au]: %17.12lf\n",
918 ecorr_zapt2);
919 ExEnv::out0() << indent
920 << scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
921 ExEnv::out0().flush();
922 }
923
924 msg_->bcast(eopt1);
925 msg_->bcast(eopt2);
926 msg_->bcast(ezapt2);
927
928 if (method_ && !strcmp(method_,"opt1")) {
929 set_energy(eopt1);
930 set_actual_value_accuracy(reference_->actual_value_accuracy()
931 *ref_to_mp2_acc);
932 }
933 else if (method_ && !strcmp(method_,"opt2")) {
934 set_energy(eopt2);
935 set_actual_value_accuracy(reference_->actual_value_accuracy()
936 *ref_to_mp2_acc);
937 }
938 else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
939 set_energy(ezapt2);
940 set_actual_value_accuracy(reference_->actual_value_accuracy()
941 *ref_to_mp2_acc);
942 }
943 else {
944 if (!(!method_ || !strcmp(method_,"zapt"))) {
945 ExEnv::out0() << indent
946 << "MBPT2: bad method: " << method_ << ", using zapt" << endl;
947 }
948 set_energy(ezapt2);
949 set_actual_value_accuracy(reference_->actual_value_accuracy()
950 *ref_to_mp2_acc);
951 }
952
953 free(trans_int1);
954 free(trans_int2);
955 free(trans_int3);
956 free(trans_int4);
957 free(trans_int4_tmp);
958 if (nsocc) free(socc_sum);
959 if (nsocc) free(socc_sum_tmp);
960 if (nsocc) free(mo_int_do_so_vir);
961 if (nsocc) free(mo_int_tmp);
962 free(evals_open);
963 free(myshells);
964 free(shellsize);
965 free (myshellsizes);
966 free (split_info);
967 free(sorted_shells);
968 free(nbf);
969 free(proc);
970
971 delete[] scf_vector;
972 delete[] scf_vector_dat;
973
974 }
975
976/////////////////////////////////////////////////////////////////
977// Function iquicksort performs a quick sort (larger -> smaller)
978// of the integer data in item by the integer indices in index;
979// data in item remain unchanged
980/////////////////////////////////////////////////////////////////
981static void
982iquicksort(int *item,int *index,int n)
983{
984 int i;
985 if (n<=0) return;
986 for (i=0; i<n; i++) {
987 index[i] = i;
988 }
989 iqs(item,index,0,n-1);
990 }
991
992static void
993iqs(int *item,int *index,int left,int right)
994{
995 register int i,j;
996 int x,y;
997
998 i=left; j=right;
999 x=item[index[(left+right)/2]];
1000
1001 do {
1002 while(item[index[i]]>x && i<right) i++;
1003 while(x>item[index[j]] && j>left) j--;
1004
1005 if (i<=j) {
1006 if (item[index[i]] != item[index[j]]) {
1007 y=index[i];
1008 index[i]=index[j];
1009 index[j]=y;
1010 }
1011 i++; j--;
1012 }
1013 } while(i<=j);
1014
1015 if (left<j) iqs(item,index,left,j);
1016 if (i<right) iqs(item,index,i,right);
1017}
1018
1019////////////////////////////////////////////////////////////////////
1020// Function findprocminmax finds the processor with the most/fewest
1021// basis functions and the corresponding number of basis functions
1022////////////////////////////////////////////////////////////////////
1023static void
1024findprocminmax(int *nbf, int nproc,
1025 int *procmin, int *procmax, int *minbf, int *maxbf)
1026{
1027 int i;
1028
1029 *procmax = *procmin = 0;
1030 *maxbf = nbf[0];
1031 *minbf = nbf[0];
1032
1033 for (i=1; i<nproc; i++) {
1034 if (nbf[i] > *maxbf) {
1035 *maxbf = nbf[i];
1036 *procmax = i;
1037 }
1038 if (nbf[i] < *minbf) {
1039 *minbf = nbf[i];
1040 *procmin = i;
1041 }
1042 }
1043}
1044
1045/////////////////////////////////////////////////////////////////
1046// Function findshellmax finds the largest shell on a processor
1047/////////////////////////////////////////////////////////////////
1048static void
1049findshellmax(int *myshellsizes, int nRshell, int *shellmax, int *shellmaxindex)
1050{
1051 int i;
1052
1053 *shellmax = myshellsizes[0];
1054 *shellmaxindex = 0;
1055
1056 for (i=1; i<nRshell; i++) {
1057 if (myshellsizes[i] > *shellmax) {
1058 *shellmax = myshellsizes[i];
1059 *shellmaxindex = i;
1060 }
1061 }
1062}
1063
1064//////////////////////////////////////////////////////////////
1065// Function expand_array expands the dimension of an array of
1066// doubles by 1;
1067// NB: THE ARRAY MUST HAVE BEEN ALLOCATED WITH MALLOC
1068//////////////////////////////////////////////////////////////
1069static void
1070expandintarray(int *&a, int olddim)
1071{
1072 int i;
1073 int *tmp;
1074
1075 tmp = (int*) malloc((olddim+1)*sizeof(int));
1076
1077 for (i=0; i<olddim; i++) {
1078 tmp[i] = a[i];
1079 }
1080 tmp[olddim] = 0;
1081
1082 free(a);
1083
1084 a = tmp;
1085}
1086
1087////////////////////////////////////////////////////////////////////////////
1088
1089// Local Variables:
1090// mode: c++
1091// c-file-style: "CLJ-CONDENSED"
1092// End:
Note: See TracBrowser for help on using the repository browser.