source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/hsosv1.cc@ 72461c

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable
Last change on this file since 72461c was 860145, checked in by Frederik Heber <heber@…>, 8 years ago

Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'

  • Property mode set to 100644
File size: 30.9 KB
Line 
1//
2// hsosv1.cc
3//
4// Copyright (C) 1996 Limit Point Systems, Inc.
5//
6// Author: Ida Nielsen <ida@kemi.aau.dk>
7// Maintainer: LPS
8//
9// This file is part of the SC Toolkit.
10//
11// The SC Toolkit is free software; you can redistribute it and/or modify
12// it under the terms of the GNU Library General Public License as published by
13// the Free Software Foundation; either version 2, or (at your option)
14// any later version.
15//
16// The SC Toolkit is distributed in the hope that it will be useful,
17// but WITHOUT ANY WARRANTY; without even the implied warranty of
18// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19// GNU Library General Public License for more details.
20//
21// You should have received a copy of the GNU Library General Public License
22// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24//
25// The U.S. Government is granted a limited license as per AL 91-7.
26//
27
28typedef int dmt_matrix;
29
30#include <stdlib.h>
31#include <math.h>
32
33#include <util/misc/formio.h>
34#include <util/misc/timer.h>
35#include <util/class/class.h>
36#include <util/state/state.h>
37#include <util/group/message.h>
38#include <math/scmat/matrix.h>
39#include <chemistry/molecule/molecule.h>
40#include <chemistry/qc/scf/scf.h>
41#include <chemistry/qc/mbpt/mbpt.h>
42#include <chemistry/qc/mbpt/bzerofast.h>
43#include <chemistry/qc/mbpt/hsosv1e1.h>
44
45using namespace std;
46using namespace sc;
47
48static distsize_t
49compute_v1_memory(int ni,
50 int nfuncmax, int nbasis, int noso,
51 int a_number, int nshell,
52 int ndocc, int nsocc, int nvir,
53 int nfzc, int nfzv,
54 int nproc)
55{
56 distsize_t mem = 0;
57 int nocc = ndocc + nsocc;
58 int dim_ij = nocc*ni - (ni*(ni-1))/2;
59 mem += nproc*sizeof(int);
60 mem += (noso+nsocc-nfzc-nfzv)*sizeof(double);
61 mem += nfuncmax*nfuncmax*nbasis*ni*sizeof(double);
62 mem += nfuncmax*nfuncmax*nbasis*ni*sizeof(double);
63 mem += (distsize_t)nbasis*a_number*dim_ij*sizeof(double);
64 mem += nvir*a_number*sizeof(double);
65 mem += nvir*nvir*sizeof(double);
66 if (nsocc) {
67 mem += nsocc*sizeof(double);
68 mem += ndocc*nsocc*(nvir-nsocc)*sizeof(double);
69 mem += ndocc*nsocc*(nvir-nsocc)*sizeof(double);
70 }
71 mem += sizeof(double*)*(nbasis);
72 mem += sizeof(double)*((nocc+nvir)*nbasis);
73 return mem;
74}
75
76void
77MBPT2::compute_hsos_v1()
78{
79 int i, j;
80 int s1, s2;
81 int a, b;
82 int isocc, asocc; /* indices running over singly occupied orbitals */
83 int nfuncmax = basis()->max_nfunction_in_shell();
84 int nvir;
85 int nocc=0;
86 int ndocc=0,nsocc=0;
87 int i_offset;
88 int npass, pass;
89 int ni; /* batch size */
90 int nr, ns;
91 int R, S;
92 int q, r, s;
93 int bf3,bf4;
94 int docc_index, socc_index, vir_index;
95 int me;
96 int nproc;
97 int rest;
98 int a_rest;
99 int a_number; /* number of a-values processed by each node */
100 int a_offset;
101 int *a_vector; /* each node's # of iajb integrals for one i,j */
102 int compute_index;
103 int tmp_index;
104 int dim_ij;
105 int nshell;
106 double *evals_open; /* reordered scf eigenvalues */
107 double *trans_int1; /* partially transformed integrals */
108 double *trans_int2; /* partially transformed integrals */
109 double *trans_int3; /* partially transformed integrals */
110 double *trans_int4_node;/* each node's subset of fully transf. integrals */
111 double *trans_int4; /* fully transformed integrals */
112 double *mo_int_do_so_vir=0;/*mo integral (is|sa); i:d.o.,s:s.o.,a:vir */
113 double *mo_int_tmp=0; /* scratch array used in global summations */
114 double *socc_sum=0; /* sum of 2-el integrals involving only s.o.'s */
115 double *iqrs;
116 double *iars_ptr, *iajs_ptr, *iajr_ptr;
117 double iajr;
118 double iars;
119 double *iajb;
120 double *c_qa;
121 double *c_rb, *c_rj, *c_sj;
122 double delta_ijab;
123 double delta;
124 double contrib1, contrib2;
125 double ecorr_opt2=0,ecorr_opt1=0;
126 double ecorr_zapt2;
127 double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
128 double escf;
129 double eopt2,eopt1,ezapt2;
130 double tol; /* log2 of the erep tolerance (erep < 2^tol => discard) */
131 int ithread;
132
133 me = msg_->me();
134
135 ExEnv::out0() << indent << "Just entered OPT2 program (opt2_v1)" << endl;
136
137 tol = (int) (-10.0/log10(2.0)); /* discard ereps smaller than 10^-10 */
138
139 nproc = msg_->n();
140 ExEnv::out0() << indent << "nproc = " << nproc << endl;
141
142 ndocc = nsocc = 0;
143 const double epsilon = 1.0e-4;
144 for (i=0; i<oso_dimension()->n(); i++) {
145 if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
146 else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
147 }
148
149 /* do a few preliminary tests to make sure the desired calculation *
150 * can be done (and appears to be meaningful!) */
151
152 if (ndocc == 0 && nsocc == 0) {
153 ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
154 abort();
155 }
156
157 if (nfzc > ndocc) {
158 ExEnv::err0()
159 << "The number of frozen core orbitals exceeds the number" << endl
160 << "of doubly occupied orbitals; program exiting" << endl;
161 abort();
162 }
163
164 if (nfzv > noso - ndocc - nsocc) {
165 ExEnv::err0()
166 << "The number of frozen virtual orbitals exceeds the number" << endl
167 << "of unoccupied orbitals; program exiting" << endl;
168 abort();
169 }
170
171 ndocc = ndocc - nfzc;
172 /* nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb. */
173 nvir = noso - ndocc - nfzc - nfzv;
174 /* nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb. */
175 nocc = ndocc + nsocc;
176
177
178 /* compute number of a-values (a_number) processed by each node */
179
180 a_number = nvir/nproc;
181 a_rest = nvir%nproc;
182 if (me < a_rest) a_number++;
183
184 if (me == 0 && a_number < nsocc) {
185 ExEnv::err0() << "not enough memory allocated" << endl;
186 /* must have all socc's on node 0 for computation of socc_sum*/
187 abort();
188 }
189
190 if (me < a_rest) a_offset = me*a_number; /* a_offset for each node */
191 else a_offset = a_rest*(a_number + 1) + (me - a_rest)*a_number;
192
193 /* fill in elements of a_vector for gcollect */
194
195 a_vector = (int*) malloc(nproc*sizeof(int));
196 if (!a_vector) {
197 ExEnv::errn() << "could not allocate storage for a_vector" << endl;
198 abort();
199 }
200 for (i=0; i<nproc; i++) {
201 a_vector[i] = nvir*(nvir/nproc)*sizeof(double);
202 }
203 for (i=0; i<a_rest; i++) {
204 a_vector[i] += nvir*sizeof(double); /* first a_rest nodes hold an extra a */
205 }
206
207 // Cannot restart when singly occupied orbitals are present
208 if (nsocc) {
209 restart_orbital_v1_ = 0;
210 }
211 else if (restart_orbital_v1_) {
212 ExEnv::out0() << indent
213 << scprintf("Restarting at orbital %d with partial energy %18.14f",
214 restart_orbital_v1_, restart_ecorr_)
215 << endl;
216 }
217
218 /* compute batch size ni for opt2 loops *
219 * need to store the following arrays: trans_int1-4, trans_int4_node, *
220 * scf_vector, evals_open, socc_sum, mo_int_do_so_vir, mo_int_tmp and *
221 * a_vector; *
222 * since a_number is not the same on all nodes, use node 0's a_number *
223 * (which is >= all other a_numbers) and broadcast ni afterwords */
224
225 nshell = basis()->nshell();
226 size_t memused = 0;
227 ni = 0;
228 for (i=1; i<=nocc-restart_orbital_v1_; i++) {
229 distsize_t tmpmem = compute_v1_memory(i,
230 nfuncmax, nbasis, noso,
231 a_number, nshell,
232 ndocc, nsocc, nvir,
233 nfzc, nfzv, nproc);
234 if (tmpmem > mem_alloc) break;
235 ni = i;
236 memused = distsize_to_size(tmpmem);
237 }
238
239 size_t mem_remaining = mem_alloc - memused;
240
241 /* set ni equal to the smallest batch size for any node */
242 msg_->min(ni);
243 msg_->bcast(ni);
244
245 ExEnv::out0() << indent
246 << "Memory available per node: " << mem_alloc << " Bytes"
247 << endl;
248 ExEnv::out0() << indent
249 << "Total memory used per node: " << memused << " Bytes"
250 << endl;
251 ExEnv::out0() << indent
252 << "Memory required for one pass: "
253 << compute_v1_memory(nocc-restart_orbital_v1_,
254 nfuncmax, nbasis, noso, a_number, nshell,
255 ndocc, nsocc, nvir, nfzc, nfzv, nproc)
256 << " Bytes"
257 << endl;
258 ExEnv::out0() << indent
259 << "Minimum memory required: "
260 << compute_v1_memory(1,
261 nfuncmax, nbasis, noso, a_number, nshell,
262 ndocc, nsocc, nvir, nfzc, nfzv, nproc)
263 << " Bytes"
264 << endl;
265 ExEnv::out0() << indent
266 << "Batch size: " << ni
267 << endl;
268
269 if (ni < nsocc) {
270 ExEnv::out0() << indent << "Not enough memory allocated to handle"
271 << " SOCC orbs in first pass" << endl;
272 abort();
273 }
274
275 if (ni < 1) {
276 ExEnv::out0() << indent << "Not enough memory allocated" << endl;
277 abort();
278 }
279
280 rest = (nocc-restart_orbital_v1_)%ni;
281 npass = (nocc - restart_orbital_v1_ - rest)/ni + 1;
282 if (rest == 0) npass--;
283
284 if (me == 0) {
285 ExEnv::out0() << indent << " npass rest nbasis nshell nfuncmax"
286 << " ndocc nsocc nvir nfzc nfzv" << endl;
287 ExEnv::out0() << indent << scprintf(" %-4i %-3i %-5i %-4i %-3i"
288 " %-3i %-3i %-3i %-3i %-3i",
289 npass,rest,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv)
290 << endl;
291 }
292
293 /* the scf vector might be distributed between the nodes, but for OPT2 *
294 * each node needs its own copy of the vector; *
295 * therefore, put a copy of the scf vector on each node; *
296 * while doing this, duplicate columns corresponding to singly *
297 * occupied orbitals and order columns as [socc docc socc unocc] */
298 /* also rearrange scf eigenvalues as [socc docc socc unocc] *
299 * want socc first to get the socc's in the first batch *
300 * (need socc's to compute energy denominators - see *
301 * socc_sum comment below) */
302
303 evals_open = (double*) malloc((noso+nsocc-nfzc-nfzv)*sizeof(double));
304 if (!evals_open) {
305 ExEnv::errn() << "could not allocate storage for evals_open" << endl;
306 abort();
307 }
308
309 RefDiagSCMatrix occ;
310 RefDiagSCMatrix evals;
311 RefSCMatrix Scf_Vec;
312 eigen(evals, Scf_Vec, occ);
313
314 if (debug_>0) ExEnv::out0() << indent << "eigvenvectors computed" << endl;
315 if (debug_>1) evals.print("eigenvalues");
316 if (debug_>2) Scf_Vec.print("eigenvectors");
317
318 double *scf_vectort_dat = new double[noso*nbasis];
319 Scf_Vec->convert(scf_vectort_dat);
320
321 double** scf_vectort = new double*[nocc + nvir];
322
323 int idoc = 0, ivir = 0, isoc = 0;
324 for (i=nfzc; i<noso-nfzv; i++) {
325 if (occ(i) >= 2.0 - epsilon) {
326 evals_open[idoc+nsocc] = evals(i);
327 scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
328 idoc++;
329 }
330 else if (occ(i) >= 1.0 - epsilon) {
331 evals_open[isoc] = evals(i);
332 scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
333 evals_open[isoc+nocc] = evals(i);
334 scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
335 isoc++;
336 }
337 else {
338 if (ivir < nvir) {
339 evals_open[ivir+nocc+nsocc] = evals(i);
340 scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
341 }
342 ivir++;
343 }
344 }
345
346 // need the transpose of the vector
347 if (debug_>0) ExEnv::out0() << indent << "allocating scf_vector" << endl;
348 double **scf_vector = new double*[nbasis];
349 double *scf_vector_dat = new double[(nocc+nvir)*nbasis];
350 for (i=0; i<nbasis; i++) {
351 scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
352 for (j=0; j<nocc+nvir; j++) {
353 scf_vector[i][j] = scf_vectort[j][i];
354 }
355 }
356 delete[] scf_vectort;
357 delete[] scf_vectort_dat;
358
359 if (debug_>2) {
360 ExEnv::out0() << indent << "Final eigenvalues and vectors" << endl;
361 for (i=0; i<nocc+nvir; i++) {
362 ExEnv::out0() << indent << evals_open[i];
363 for (j=0; j<nbasis; j++) {
364 ExEnv::out0() << " " << scf_vector[j][i];
365 }
366 ExEnv::out0()<< endl;
367 }
368 ExEnv::out0() << endl;
369 }
370
371 /* allocate storage for integral arrays */
372 if (debug_>0) ExEnv::out0() << indent << "allocating intermediates" << endl;
373 dim_ij = nocc*ni - ni*(ni-1)/2;
374
375 trans_int1 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double));
376 trans_int2 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double));
377 trans_int3 = (double*) malloc(nbasis*a_number*dim_ij*sizeof(double));
378 trans_int4_node= (double*) malloc(nvir*a_number*sizeof(double));
379 trans_int4 = (double*) malloc(nvir*nvir*sizeof(double));
380 if (!(trans_int1 && trans_int2
381 && (!a_number || trans_int3)
382 && (!a_number || trans_int4_node) && trans_int4)){
383 ExEnv::errn() << "could not allocate storage for integral arrays" << endl;
384 abort();
385 }
386 if (nsocc) socc_sum = (double*) malloc(nsocc*sizeof(double));
387 if (nsocc) mo_int_do_so_vir =
388 (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
389 if (nsocc) mo_int_tmp =
390 (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));
391
392 if (nsocc) bzerofast(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc));
393
394 // create the integrals object
395 if (debug_>0) ExEnv::out0() << indent << "allocating integrals" << endl;
396 integral()->set_storage(mem_remaining);
397 Ref<TwoBodyInt> *tbint = new Ref<TwoBodyInt>[thr_->nthread()];
398 for (ithread=0; ithread<thr_->nthread(); ithread++) {
399 tbint[ithread] = integral()->electron_repulsion();
400 }
401
402 // set up the thread objects
403 Ref<ThreadLock> lock = thr_->new_lock();
404 HSOSV1Erep1Qtr** e1thread = new HSOSV1Erep1Qtr*[thr_->nthread()];
405 for (ithread=0; ithread<thr_->nthread(); ithread++) {
406 e1thread[ithread] = new HSOSV1Erep1Qtr(ithread, thr_->nthread(), me, nproc,
407 lock, basis(), tbint[ithread], ni,
408 scf_vector, tol, debug_);
409 }
410
411 if (debug_>0) ExEnv::out0() << indent << "beginning passes" << endl;
412
413/**************************************************************************
414* begin opt2 loops *
415***************************************************************************/
416
417 int work = ((nshell*(nshell+1))/2);
418 int print_interval = work/100;
419 if (print_interval == 0) print_interval = 1;
420 if (work == 0) work = 1;
421
422 for (pass=0; pass<npass; pass++) {
423 if (debug_) {
424 ExEnv::out0() << indent << "Beginning pass " << pass << endl;
425 }
426
427 int print_index = 0;
428
429 i_offset= pass*ni + restart_orbital_v1_;
430 if ((pass == npass - 1) && (rest != 0)) ni = rest;
431 bzerofast(trans_int3,nbasis*a_number*dim_ij);
432
433 tim_enter("RS loop");
434 for (R = 0; R < basis()->nshell(); R++) {
435 nr = basis()->shell(R).nfunction();
436
437 for (S = 0; S <= R; S++) {
438 ns = basis()->shell(S).nfunction();
439 tim_enter("bzerofast trans_int1");
440 bzerofast(trans_int1,nfuncmax*nfuncmax*nbasis*ni);
441 tim_exit("bzerofast trans_int1");
442
443 if (debug_ && (print_index++)%print_interval == 0) {
444 lock->lock();
445 ExEnv::outn() << scprintf("%d: (PQ|%d %d) %d%%",
446 me,R,S,(100*print_index)/work)
447 << endl;
448 lock->unlock();
449 }
450
451 tim_enter("PQ loop");
452
453 for (ithread=0; ithread<thr_->nthread(); ithread++) {
454 e1thread[ithread]->set_data(R,nr,S,ns,ni,i_offset);
455 thr_->add_thread(ithread,e1thread[ithread]);
456 }
457 thr_->start_threads();
458 thr_->wait_threads();
459 for (ithread=0; ithread<thr_->nthread(); ithread++) {
460 e1thread[ithread]->accum_buffer(trans_int1);
461 }
462
463 tim_exit("PQ loop");
464
465 tim_enter("sum int");
466 msg_->sum(trans_int1,nr*ns*nbasis*ni,trans_int2);
467 tim_exit("sum int");
468
469 /* begin second quarter transformation */
470
471 tim_enter("bzerofast trans_int2");
472 bzerofast(trans_int2,nfuncmax*nfuncmax*nbasis*ni);
473 tim_exit("bzerofast trans_int2");
474
475 tim_enter("2. quart. tr.");
476
477 for (bf3 = 0; bf3 < nr; bf3++) {
478
479 for (bf4 = 0; bf4 < ns; bf4++) {
480 if (R == S && bf4 > bf3) continue;
481
482 for (q = 0; q < nbasis; q++) {
483 c_qa = &scf_vector[q][nocc + a_offset];
484 iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];
485 iars_ptr = &trans_int2[((bf4*nr + bf3)*a_number)*ni];
486
487 for (a = 0; a < a_number; a++) {
488
489 for (i=ni; i; i--) {
490 *iars_ptr++ += *c_qa * *iqrs++;
491 }
492
493 iqrs -= ni;
494 c_qa++;
495 }
496 }
497 }
498 }
499 tim_exit("2. quart. tr.");
500
501 /* begin third quarter transformation */
502 tim_enter("3. quart. tr.");
503
504
505 for (bf3 = 0; bf3<nr; bf3++) {
506 r = basis()->shell_to_function(R) + bf3;
507
508 for (bf4 = 0; bf4 <= (R == S ? bf3:(ns-1)); bf4++) {
509 s = basis()->shell_to_function(S) + bf4;
510
511 for (i=0; i<ni; i++) {
512 tmp_index = i*(i+1)/2 + i*i_offset;
513
514 for (a=0; a<a_number; a++) {
515 iars = trans_int2[((bf4*nr + bf3)*a_number + a)*ni + i];
516 if (r == s) iars *= 0.5;
517 iajs_ptr = &trans_int3[tmp_index + dim_ij*(a + a_number*s)];
518 iajr_ptr = &trans_int3[tmp_index + dim_ij*(a + a_number*r)];
519 c_rj = scf_vector[r];
520 c_sj = scf_vector[s];
521
522 for (j=0; j<=i+i_offset; j++) {
523 *iajs_ptr++ += *c_rj++ * iars;
524 *iajr_ptr++ += *c_sj++ * iars;
525 }
526 }
527 }
528 } /* exit bf4 loop */
529 } /* exit bf3 loop */
530 tim_exit("3. quart. tr.");
531 } /* exit S loop */
532 } /* exit R loop */
533 tim_exit("RS loop");
534
535 /* begin fourth quarter transformation; *
536 * first tansform integrals with only s.o. indices; *
537 * these integrals are needed to compute the denominators *
538 * in the various terms contributing to the correlation energy *
539 * and must all be computed in the first pass; *
540 * the integrals are summed into the array socc_sum: *
541 * socc_sum[isocc] = sum over asocc of (isocc asocc|asocc isocc) *
542 * (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s) *
543 * the individual integrals are not saved here, only the sums are kept */
544
545 if (debug_) {
546 ExEnv::out0() << indent << "Beginning 4. quarter transform" << endl;
547 }
548
549 tim_enter("4. quart. tr.");
550 if (pass == 0 && me == 0) {
551 if (nsocc) bzerofast(socc_sum,nsocc);
552 for (isocc=0; isocc<nsocc; isocc++) {
553
554 for (r=0; r<nbasis; r++) {
555
556 for (asocc=0; asocc<nsocc; asocc++) {
557 socc_sum[isocc] += scf_vector[r][nocc+asocc]*
558 trans_int3[isocc*(isocc+1)/2 + isocc*i_offset
559 + isocc + dim_ij*(asocc + a_number*r)];
560 }
561 }
562 }
563 }
564
565 tim_enter("bcast0 socc_sum");
566 if (nsocc) msg_->bcast(socc_sum,nsocc);
567 tim_exit("bcast0 socc_sum");
568
569 tim_exit("4. quart. tr.");
570
571 /* now we have all the sums of integrals involving s.o.'s (socc_sum); *
572 * begin fourth quarter transformation for all integrals (including *
573 * integrals with only s.o. indices); use restriction j <= (i_offset+i) *
574 * to save flops */
575
576 compute_index = 0;
577
578 for (i=0; i<ni; i++) {
579
580 for (j=0; j <= (i_offset+i); j++) {
581
582 tim_enter("4. quart. tr.");
583
584 bzerofast(trans_int4_node,nvir*a_number);
585
586 for (r=0; r<nbasis; r++) {
587
588 for (a=0; a<a_number; a++) {
589 iajb = &trans_int4_node[a*nvir];
590 c_rb = &scf_vector[r][nocc];
591 iajr = trans_int3[i*(i+1)/2 + i*i_offset + j + dim_ij*(a+a_number*r)];
592
593 for (b=0; b<nvir; b++) {
594 *iajb++ += *c_rb++ * iajr;
595 }
596 }
597 }
598
599 tim_exit("4. quart. tr.");
600
601 /* collect each node's part of fully transf. int. into trans_int4 */
602 tim_enter("collect");
603 msg_->collect(trans_int4_node,a_vector,trans_int4);
604 tim_exit("collect");
605
606
607 /* we now have the fully transformed integrals (ia|jb) *
608 * for one i, one j (j <= i_offset+i), and all a and b; *
609 * compute contribution to the OPT1 and OPT2 correlation *
610 * energies; use restriction b <= a to save flops */
611
612 tim_enter("compute ecorr");
613
614 for (a=0; a<nvir; a++) {
615 for (b=0; b<=a; b++) {
616 compute_index++;
617 if (compute_index%nproc != me) continue;
618
619 docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
620 + (j >= nsocc && j < nocc);
621 socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
622 vir_index = (a >= nsocc) + (b >= nsocc);
623
624 if (socc_index >= 3) continue; /* skip to next b value */
625
626 delta_ijab = evals_open[i_offset+i] + evals_open[j]
627 - evals_open[nocc+a] - evals_open[nocc+b];
628
629 /* determine integral type and compute energy contribution */
630 if (docc_index == 2 && vir_index == 2) {
631 if (i_offset+i == j && a == b) {
632 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
633 ecorr_opt2 += contrib1/delta_ijab;
634 ecorr_opt1 += contrib1/delta_ijab;
635 }
636 else if (i_offset+i == j || a == b) {
637 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
638 ecorr_opt2 += 2*contrib1/delta_ijab;
639 ecorr_opt1 += 2*contrib1/delta_ijab;
640 }
641 else {
642 contrib1 = trans_int4[a*nvir + b];
643 contrib2 = trans_int4[b*nvir + a];
644 ecorr_opt2 += 4*(contrib1*contrib1 + contrib2*contrib2
645 - contrib1*contrib2)/delta_ijab;
646 ecorr_opt1 += 4*(contrib1*contrib1 + contrib2*contrib2
647 - contrib1*contrib2)/delta_ijab;
648 }
649 }
650 else if (docc_index == 2 && socc_index == 2) {
651 contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
652 (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
653 ecorr_opt2 += contrib1/
654 (delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
655 ecorr_opt1 += contrib1/delta_ijab;
656 }
657 else if (socc_index == 2 && vir_index == 2) {
658 contrib1 = (trans_int4[a*nvir + b] - trans_int4[b*nvir + a])*
659 (trans_int4[a*nvir + b] - trans_int4[b*nvir + a]);
660 ecorr_opt2 += contrib1/
661 (delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
662 ecorr_opt1 += contrib1/delta_ijab;
663 }
664 else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
665 if (i_offset+i == j) {
666 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
667 ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
668 ecorr_opt1 += contrib1/delta_ijab;
669 }
670 else {
671 contrib1 = trans_int4[a*nvir + b];
672 contrib2 = trans_int4[b*nvir + a];
673 ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
674 - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[b]);
675 ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
676 - contrib1*contrib2)/delta_ijab;
677 }
678 }
679 else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
680 contrib1 = trans_int4[b*nvir+a]*trans_int4[b*nvir+a];
681 if (j == b) {
682 /* to compute the total energy contribution from an integral *
683 * of the type (is1|s1a) (i=d.o., s1=s.o., a=unocc.), we need *
684 * the (is|sa) integrals for all s=s.o.; these integrals are *
685 * therefore stored here in the array mo_int_do_so_vir, and *
686 * the energy contribution is computed after exiting the loop *
687 * over i-batches (pass) */
688 mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
689 (i_offset+i-nsocc + ndocc*b)] =
690 trans_int4[b*nvir + a];
691 ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
692 ecorr_opt1 += 1.5*contrib1/delta_ijab;
693 ecorr_zapt2_contrib += contrib1/
694 (delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
695 + 0.5*contrib1/delta_ijab;
696 }
697 else {
698 ecorr_opt2 += contrib1/
699 (delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
700 ecorr_opt1 += contrib1/delta_ijab;
701 }
702 }
703 else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
704 if (a == b) {
705 contrib1 = trans_int4[a*nvir + b]*trans_int4[a*nvir + b];
706 ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
707 ecorr_opt1 += contrib1/delta_ijab;
708 }
709 else {
710 contrib1 = trans_int4[a*nvir + b];
711 contrib2 = trans_int4[b*nvir + a];
712 ecorr_opt2 += 2*(contrib1*contrib1 + contrib2*contrib2
713 - contrib1*contrib2)/(delta_ijab - 0.5*socc_sum[j]);
714 ecorr_opt1 += 2*(contrib1*contrib1 + contrib2*contrib2
715 - contrib1*contrib2)/delta_ijab;
716 }
717 }
718 } /* exit b loop */
719 } /* exit a loop */
720 tim_exit("compute ecorr");
721 } /* exit j loop */
722 } /* exit i loop */
723
724 if (nsocc == 0 && npass > 1 && pass < npass - 1) {
725 double passe = ecorr_opt2;
726 msg_->sum(passe);
727 ExEnv::out0() << indent
728 << "Partial correlation energy for pass " << pass << ":" << endl;
729 ExEnv::out0() << indent
730 << scprintf(" restart_ecorr = %18.14f", passe)
731 << endl;
732 ExEnv::out0() << indent
733 << scprintf(" restart_orbital_v1 = %d", ((pass+1) * ni))
734 << endl;
735 }
736 } /* exit loop over i-batches (pass) */
737
738 // don't need the AO integrals and threads anymore
739 double aoint_computed = 0.0;
740 for (i=0; i<thr_->nthread(); i++) {
741 tbint[i] = 0;
742 aoint_computed += e1thread[i]->aoint_computed();
743 delete e1thread[i];
744 }
745 delete[] e1thread;
746 delete[] tbint;
747
748 /* compute contribution from excitations of the type is1 -> s1a where *
749 * i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a, *
750 * where i and a have the same spin, contribute to this term; *
751 * (Brillouin's theorem not satisfied for ROHF wave functions); */
752
753 tim_enter("compute ecorr");
754
755 if (nsocc > 0) {
756 tim_enter("sum mo_int_do_so_vir");
757 msg_->sum(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc),mo_int_tmp);
758 tim_exit("sum mo_int_do_so_vir");
759 }
760
761 /* add extra contribution for triplet and higher spin multiplicities *
762 * contribution = sum over s1 and s2<s1 of (is1|s1a)*(is2|s2a)/delta */
763
764 if (me == 0 && nsocc) {
765 for (i=0; i<ndocc; i++) {
766
767 for (a=0; a<nvir-nsocc; a++) {
768 delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
769
770 for (s1=0; s1<nsocc; s1++) {
771
772 for (s2=0; s2<s1; s2++) {
773 contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s1)]*
774 mo_int_do_so_vir[a + (nvir-nsocc)*(i + ndocc*s2)]/delta;
775 ecorr_opt2 += contrib1;
776 ecorr_opt1 += contrib1;
777 }
778 }
779 } /* exit a loop */
780 } /* exit i loop */
781 }
782
783 tim_exit("compute ecorr");
784
785 ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
786 ecorr_opt2 += ecorr_opt2_contrib;
787 msg_->sum(ecorr_opt1);
788 msg_->sum(ecorr_opt2);
789 msg_->sum(ecorr_zapt2);
790 msg_->sum(aoint_computed);
791
792 if (restart_orbital_v1_) {
793 ecorr_opt1 += restart_ecorr_;
794 ecorr_opt2 += restart_ecorr_;
795 ecorr_zapt2 += restart_ecorr_;
796 }
797
798 escf = reference_->energy();
799 hf_energy_ = escf;
800
801 if (me == 0) {
802 eopt2 = escf + ecorr_opt2;
803 eopt1 = escf + ecorr_opt1;
804 ezapt2 = escf + ecorr_zapt2;
805
806 /* print out various energies etc.*/
807
808 ExEnv::out0() << indent
809 << "Number of shell quartets for which AO integrals would" << endl
810 << indent
811 << "have been computed without bounds checking: "
812 << npass*nshell*nshell*(nshell+1)*(nshell+1)/4 << endl;
813 ExEnv::out0() << indent
814 << "Number of shell quartets for which AO integrals" << endl
815 << indent << "were computed: " << aoint_computed << endl;
816 ExEnv::out0() << indent
817 << scprintf("ROHF energy [au]: %17.12lf\n", escf);
818 ExEnv::out0() << indent
819 << scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
820 ExEnv::out0() << indent
821 << scprintf("OPT2 second order correction [au]: %17.12lf\n", ecorr_opt2);
822 ExEnv::out0() << indent
823 << scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
824 ExEnv::out0() << indent
825 << scprintf("ZAPT2 correlation energy [au]: %17.12lf\n", ecorr_zapt2);
826 ExEnv::out0() << indent
827 << scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
828 }
829 msg_->bcast(eopt1);
830 msg_->bcast(eopt2);
831 msg_->bcast(ezapt2);
832
833 if (method_ && !strcmp(method_,"opt1")) {
834 set_energy(eopt1);
835 set_actual_value_accuracy(reference_->actual_value_accuracy()
836 *ref_to_mp2_acc);
837 }
838 else if (method_ && !strcmp(method_,"opt2")) {
839 set_energy(eopt2);
840 set_actual_value_accuracy(reference_->actual_value_accuracy()
841 *ref_to_mp2_acc);
842 }
843 else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
844 set_energy(ezapt2);
845 set_actual_value_accuracy(reference_->actual_value_accuracy()
846 *ref_to_mp2_acc);
847 }
848 else {
849 if (!(!method_ || !strcmp(method_,"zapt"))) {
850 ExEnv::out0() << indent
851 << "MBPT2: bad method: " << method_ << ", using zapt" << endl;
852 }
853 set_energy(ezapt2);
854 set_actual_value_accuracy(reference_->actual_value_accuracy()
855 *ref_to_mp2_acc);
856 }
857
858 free(trans_int1);
859 free(trans_int2);
860 free(trans_int3);
861 free(trans_int4_node);
862 free(trans_int4);
863 free(a_vector);
864 if (nsocc) free(socc_sum);
865 if (nsocc) free(mo_int_do_so_vir);
866 if (nsocc) free(mo_int_tmp);
867 free(evals_open);
868
869 delete[] scf_vector;
870 delete[] scf_vector_dat;
871 }
872
873////////////////////////////////////////////////////////////////////////////
874
875// Local Variables:
876// mode: c++
877// c-file-style: "CLJ-CONDENSED"
878// End:
Note: See TracBrowser for help on using the repository browser.