Context Navigation

hsosv1.cc@ 72461c

Visit:

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable

Last change on this file since 72461c was 860145, checked in by Frederik Heber <heber@…>, 8 years ago
Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'
Property mode set to `100644`
File size: 30.9 KB

Line
1	//
2	// hsosv1.cc
3	//
4	// Copyright (C) 1996 Limit Point Systems, Inc.
5	//
6	// Author: Ida Nielsen <ida@kemi.aau.dk>
7	// Maintainer: LPS
8	//
9	// This file is part of the SC Toolkit.
10	//
11	// The SC Toolkit is free software; you can redistribute it and/or modify
12	// it under the terms of the GNU Library General Public License as published by
13	// the Free Software Foundation; either version 2, or (at your option)
14	// any later version.
15	//
16	// The SC Toolkit is distributed in the hope that it will be useful,
17	// but WITHOUT ANY WARRANTY; without even the implied warranty of
18	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	// GNU Library General Public License for more details.
20	//
21	// You should have received a copy of the GNU Library General Public License
22	// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23	// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24	//
25	// The U.S. Government is granted a limited license as per AL 91-7.
26	//
27
28	typedef int dmt_matrix;
29
30	#include <stdlib.h>
31	#include <math.h>
32
33	#include <util/misc/formio.h>
34	#include <util/misc/timer.h>
35	#include <util/class/class.h>
36	#include <util/state/state.h>
37	#include <util/group/message.h>
38	#include <math/scmat/matrix.h>
39	#include <chemistry/molecule/molecule.h>
40	#include <chemistry/qc/scf/scf.h>
41	#include <chemistry/qc/mbpt/mbpt.h>
42	#include <chemistry/qc/mbpt/bzerofast.h>
43	#include <chemistry/qc/mbpt/hsosv1e1.h>
44
45	using namespace std;
46	using namespace sc;
47
48	static distsize_t
49	compute_v1_memory(int ni,
50	int nfuncmax, int nbasis, int noso,
51	int a_number, int nshell,
52	int ndocc, int nsocc, int nvir,
53	int nfzc, int nfzv,
54	int nproc)
55	{
56	distsize_t mem = 0;
57	int nocc = ndocc + nsocc;
58	int dim_ij = noccni - (ni(ni-1))/2;
59	mem += nproc*sizeof(int);
60	mem += (noso+nsocc-nfzc-nfzv)*sizeof(double);
61	mem += nfuncmaxnfuncmaxnbasisnisizeof(double);
62	mem += nfuncmaxnfuncmaxnbasisnisizeof(double);
63	mem += (distsize_t)nbasisa_numberdim_ij*sizeof(double);
64	mem += nvira_numbersizeof(double);
65	mem += nvirnvirsizeof(double);
66	if (nsocc) {
67	mem += nsocc*sizeof(double);
68	mem += ndoccnsocc(nvir-nsocc)*sizeof(double);
69	mem += ndoccnsocc(nvir-nsocc)*sizeof(double);
70	}
71	mem += sizeof(double)(nbasis);
72	mem += sizeof(double)((nocc+nvir)nbasis);
73	return mem;
74	}
75
76	void
77	MBPT2::compute_hsos_v1()
78	{
79	int i, j;
80	int s1, s2;
81	int a, b;
82	int isocc, asocc; /* indices running over singly occupied orbitals */
83	int nfuncmax = basis()->max_nfunction_in_shell();
84	int nvir;
85	int nocc=0;
86	int ndocc=0,nsocc=0;
87	int i_offset;
88	int npass, pass;
89	int ni; /* batch size */
90	int nr, ns;
91	int R, S;
92	int q, r, s;
93	int bf3,bf4;
94	int docc_index, socc_index, vir_index;
95	int me;
96	int nproc;
97	int rest;
98	int a_rest;
99	int a_number; /* number of a-values processed by each node */
100	int a_offset;
101	int a_vector; / each node's # of iajb integrals for one i,j */
102	int compute_index;
103	int tmp_index;
104	int dim_ij;
105	int nshell;
106	double evals_open; / reordered scf eigenvalues */
107	double trans_int1; / partially transformed integrals */
108	double trans_int2; / partially transformed integrals */
109	double trans_int3; / partially transformed integrals */
110	double trans_int4_node;/ each node's subset of fully transf. integrals */
111	double trans_int4; / fully transformed integrals */
112	double mo_int_do_so_vir=0;/mo integral (is\|sa); i:d.o.,s:s.o.,a:vir */
113	double mo_int_tmp=0; / scratch array used in global summations */
114	double socc_sum=0; / sum of 2-el integrals involving only s.o.'s */
115	double *iqrs;
116	double iars_ptr, iajs_ptr, *iajr_ptr;
117	double iajr;
118	double iars;
119	double *iajb;
120	double *c_qa;
121	double c_rb, c_rj, *c_sj;
122	double delta_ijab;
123	double delta;
124	double contrib1, contrib2;
125	double ecorr_opt2=0,ecorr_opt1=0;
126	double ecorr_zapt2;
127	double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
128	double escf;
129	double eopt2,eopt1,ezapt2;
130	double tol; /* log2 of the erep tolerance (erep < 2^tol => discard) */
131	int ithread;
132
133	me = msg_->me();
134
135	ExEnv::out0() << indent << "Just entered OPT2 program (opt2_v1)" << endl;
136
137	tol = (int) (-10.0/log10(2.0)); /* discard ereps smaller than 10^-10 */
138
139	nproc = msg_->n();
140	ExEnv::out0() << indent << "nproc = " << nproc << endl;
141
142	ndocc = nsocc = 0;
143	const double epsilon = 1.0e-4;
144	for (i=0; i<oso_dimension()->n(); i++) {
145	if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
146	else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
147	}
148
149	/* do a few preliminary tests to make sure the desired calculation *
150	* can be done (and appears to be meaningful!) */
151
152	if (ndocc == 0 && nsocc == 0) {
153	ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
154	abort();
155	}
156
157	if (nfzc > ndocc) {
158	ExEnv::err0()
159	<< "The number of frozen core orbitals exceeds the number" << endl
160	<< "of doubly occupied orbitals; program exiting" << endl;
161	abort();
162	}
163
164	if (nfzv > noso - ndocc - nsocc) {
165	ExEnv::err0()
166	<< "The number of frozen virtual orbitals exceeds the number" << endl
167	<< "of unoccupied orbitals; program exiting" << endl;
168	abort();
169	}
170
171	ndocc = ndocc - nfzc;
172	/* nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb. */
173	nvir = noso - ndocc - nfzc - nfzv;
174	/* nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb. */
175	nocc = ndocc + nsocc;
176
177
178	/* compute number of a-values (a_number) processed by each node */
179
180	a_number = nvir/nproc;
181	a_rest = nvir%nproc;
182	if (me < a_rest) a_number++;
183
184	if (me == 0 && a_number < nsocc) {
185	ExEnv::err0() << "not enough memory allocated" << endl;
186	/* must have all socc's on node 0 for computation of socc_sum*/
187	abort();
188	}
189
190	if (me < a_rest) a_offset = mea_number; / a_offset for each node */
191	else a_offset = a_rest(a_number + 1) + (me - a_rest)a_number;
192
193	/* fill in elements of a_vector for gcollect */
194
195	a_vector = (int) malloc(nprocsizeof(int));
196	if (!a_vector) {
197	ExEnv::errn() << "could not allocate storage for a_vector" << endl;
198	abort();
199	}
200	for (i=0; i<nproc; i++) {
201	a_vector[i] = nvir(nvir/nproc)sizeof(double);
202	}
203	for (i=0; i<a_rest; i++) {
204	a_vector[i] += nvirsizeof(double); / first a_rest nodes hold an extra a */
205	}
206
207	// Cannot restart when singly occupied orbitals are present
208	if (nsocc) {
209	restart_orbital_v1_ = 0;
210	}
211	else if (restart_orbital_v1_) {
212	ExEnv::out0() << indent
213	<< scprintf("Restarting at orbital %d with partial energy %18.14f",
214	restart_orbital_v1_, restart_ecorr_)
215	<< endl;
216	}
217
218	/* compute batch size ni for opt2 loops *
219	* need to store the following arrays: trans_int1-4, trans_int4_node, *
220	* scf_vector, evals_open, socc_sum, mo_int_do_so_vir, mo_int_tmp and *
221	* a_vector; *
222	* since a_number is not the same on all nodes, use node 0's a_number *
223	* (which is >= all other a_numbers) and broadcast ni afterwords */
224
225	nshell = basis()->nshell();
226	size_t memused = 0;
227	ni = 0;
228	for (i=1; i<=nocc-restart_orbital_v1_; i++) {
229	distsize_t tmpmem = compute_v1_memory(i,
230	nfuncmax, nbasis, noso,
231	a_number, nshell,
232	ndocc, nsocc, nvir,
233	nfzc, nfzv, nproc);
234	if (tmpmem > mem_alloc) break;
235	ni = i;
236	memused = distsize_to_size(tmpmem);
237	}
238
239	size_t mem_remaining = mem_alloc - memused;
240
241	/* set ni equal to the smallest batch size for any node */
242	msg_->min(ni);
243	msg_->bcast(ni);
244
245	ExEnv::out0() << indent
246	<< "Memory available per node: " << mem_alloc << " Bytes"
247	<< endl;
248	ExEnv::out0() << indent
249	<< "Total memory used per node: " << memused << " Bytes"
250	<< endl;
251	ExEnv::out0() << indent
252	<< "Memory required for one pass: "
253	<< compute_v1_memory(nocc-restart_orbital_v1_,
254	nfuncmax, nbasis, noso, a_number, nshell,
255	ndocc, nsocc, nvir, nfzc, nfzv, nproc)
256	<< " Bytes"
257	<< endl;
258	ExEnv::out0() << indent
259	<< "Minimum memory required: "
260	<< compute_v1_memory(1,
261	nfuncmax, nbasis, noso, a_number, nshell,
262	ndocc, nsocc, nvir, nfzc, nfzv, nproc)
263	<< " Bytes"
264	<< endl;
265	ExEnv::out0() << indent
266	<< "Batch size: " << ni
267	<< endl;
268
269	if (ni < nsocc) {
270	ExEnv::out0() << indent << "Not enough memory allocated to handle"
271	<< " SOCC orbs in first pass" << endl;
272	abort();
273	}
274
275	if (ni < 1) {
276	ExEnv::out0() << indent << "Not enough memory allocated" << endl;
277	abort();
278	}
279
280	rest = (nocc-restart_orbital_v1_)%ni;
281	npass = (nocc - restart_orbital_v1_ - rest)/ni + 1;
282	if (rest == 0) npass--;
283
284	if (me == 0) {
285	ExEnv::out0() << indent << " npass rest nbasis nshell nfuncmax"
286	<< " ndocc nsocc nvir nfzc nfzv" << endl;
287	ExEnv::out0() << indent << scprintf(" %-4i %-3i %-5i %-4i %-3i"
288	" %-3i %-3i %-3i %-3i %-3i",
289	npass,rest,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv)
290	<< endl;
291	}
292
293	/* the scf vector might be distributed between the nodes, but for OPT2 *
294	* each node needs its own copy of the vector; *
295	* therefore, put a copy of the scf vector on each node; *
296	* while doing this, duplicate columns corresponding to singly *
297	* occupied orbitals and order columns as [socc docc socc unocc] */
298	/* also rearrange scf eigenvalues as [socc docc socc unocc] *
299	* want socc first to get the socc's in the first batch *
300	* (need socc's to compute energy denominators - see *
301	* socc_sum comment below) */
302
303	evals_open = (double) malloc((noso+nsocc-nfzc-nfzv)sizeof(double));
304	if (!evals_open) {
305	ExEnv::errn() << "could not allocate storage for evals_open" << endl;
306	abort();
307	}
308
309	RefDiagSCMatrix occ;
310	RefDiagSCMatrix evals;
311	RefSCMatrix Scf_Vec;
312	eigen(evals, Scf_Vec, occ);
313
314	if (debug_>0) ExEnv::out0() << indent << "eigvenvectors computed" << endl;
315	if (debug_>1) evals.print("eigenvalues");
316	if (debug_>2) Scf_Vec.print("eigenvectors");
317
318	double scf_vectort_dat = new double[nosonbasis];
319	Scf_Vec->convert(scf_vectort_dat);
320
321	double** scf_vectort = new double*[nocc + nvir];
322
323	int idoc = 0, ivir = 0, isoc = 0;
324	for (i=nfzc; i<noso-nfzv; i++) {
325	if (occ(i) >= 2.0 - epsilon) {
326	evals_open[idoc+nsocc] = evals(i);
327	scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
328	idoc++;
329	}
330	else if (occ(i) >= 1.0 - epsilon) {
331	evals_open[isoc] = evals(i);
332	scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
333	evals_open[isoc+nocc] = evals(i);
334	scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
335	isoc++;
336	}
337	else {
338	if (ivir < nvir) {
339	evals_open[ivir+nocc+nsocc] = evals(i);
340	scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
341	}
342	ivir++;
343	}
344	}
345
346	// need the transpose of the vector
347	if (debug_>0) ExEnv::out0() << indent << "allocating scf_vector" << endl;
348	double *scf_vector = new double[nbasis];
349	double scf_vector_dat = new double[(nocc+nvir)nbasis];
350	for (i=0; i<nbasis; i++) {
351	scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
352	for (j=0; j<nocc+nvir; j++) {
353	scf_vector[i][j] = scf_vectort[j][i];
354	}
355	}
356	delete[] scf_vectort;
357	delete[] scf_vectort_dat;
358
359	if (debug_>2) {
360	ExEnv::out0() << indent << "Final eigenvalues and vectors" << endl;
361	for (i=0; i<nocc+nvir; i++) {
362	ExEnv::out0() << indent << evals_open[i];
363	for (j=0; j<nbasis; j++) {
364	ExEnv::out0() << " " << scf_vector[j][i];
365	}
366	ExEnv::out0()<< endl;
367	}
368	ExEnv::out0() << endl;
369	}
370
371	/* allocate storage for integral arrays */
372	if (debug_>0) ExEnv::out0() << indent << "allocating intermediates" << endl;
373	dim_ij = noccni - ni(ni-1)/2;
374
375	trans_int1 = (double) malloc(nfuncmaxnfuncmaxnbasisni*sizeof(double));
376	trans_int2 = (double) malloc(nfuncmaxnfuncmaxnbasisni*sizeof(double));
377	trans_int3 = (double) malloc(nbasisa_numberdim_ijsizeof(double));
378	trans_int4_node= (double) malloc(nvira_number*sizeof(double));
379	trans_int4 = (double) malloc(nvirnvir*sizeof(double));
380	if (!(trans_int1 && trans_int2
381	&& (!a_number \|\| trans_int3)
382	&& (!a_number \|\| trans_int4_node) && trans_int4)){
383	ExEnv::errn() << "could not allocate storage for integral arrays" << endl;
384	abort();
385	}
386	if (nsocc) socc_sum = (double) malloc(nsoccsizeof(double));
387	if (nsocc) mo_int_do_so_vir =
388	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
389	if (nsocc) mo_int_tmp =
390	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
391
392	if (nsocc) bzerofast(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc));
393
394	// create the integrals object
395	if (debug_>0) ExEnv::out0() << indent << "allocating integrals" << endl;
396	integral()->set_storage(mem_remaining);
397	Ref<TwoBodyInt> *tbint = new Ref<TwoBodyInt>[thr_->nthread()];
398	for (ithread=0; ithread<thr_->nthread(); ithread++) {
399	tbint[ithread] = integral()->electron_repulsion();
400	}
401
402	// set up the thread objects
403	Ref<ThreadLock> lock = thr_->new_lock();
404	HSOSV1Erep1Qtr** e1thread = new HSOSV1Erep1Qtr*[thr_->nthread()];
405	for (ithread=0; ithread<thr_->nthread(); ithread++) {
406	e1thread[ithread] = new HSOSV1Erep1Qtr(ithread, thr_->nthread(), me, nproc,
407	lock, basis(), tbint[ithread], ni,
408	scf_vector, tol, debug_);
409	}
410
411	if (debug_>0) ExEnv::out0() << indent << "beginning passes" << endl;
412
413	/**************************************************************************
414	* begin opt2 loops *
415	***************************************************************************/
416
417	int work = ((nshell*(nshell+1))/2);
418	int print_interval = work/100;
419	if (print_interval == 0) print_interval = 1;
420	if (work == 0) work = 1;
421
422	for (pass=0; pass<npass; pass++) {
423	if (debug_) {
424	ExEnv::out0() << indent << "Beginning pass " << pass << endl;
425	}
426
427	int print_index = 0;
428
429	i_offset= pass*ni + restart_orbital_v1_;
430	if ((pass == npass - 1) && (rest != 0)) ni = rest;
431	bzerofast(trans_int3,nbasisa_numberdim_ij);
432
433	tim_enter("RS loop");
434	for (R = 0; R < basis()->nshell(); R++) {
435	nr = basis()->shell(R).nfunction();
436
437	for (S = 0; S <= R; S++) {
438	ns = basis()->shell(S).nfunction();
439	tim_enter("bzerofast trans_int1");
440	bzerofast(trans_int1,nfuncmaxnfuncmaxnbasis*ni);
441	tim_exit("bzerofast trans_int1");
442
443	if (debug_ && (print_index++)%print_interval == 0) {
444	lock->lock();
445	ExEnv::outn() << scprintf("%d: (PQ\|%d %d) %d%%",
446	me,R,S,(100*print_index)/work)
447	<< endl;
448	lock->unlock();
449	}
450
451	tim_enter("PQ loop");
452
453	for (ithread=0; ithread<thr_->nthread(); ithread++) {
454	e1thread[ithread]->set_data(R,nr,S,ns,ni,i_offset);
455	thr_->add_thread(ithread,e1thread[ithread]);
456	}
457	thr_->start_threads();
458	thr_->wait_threads();
459	for (ithread=0; ithread<thr_->nthread(); ithread++) {
460	e1thread[ithread]->accum_buffer(trans_int1);
461	}
462
463	tim_exit("PQ loop");
464
465	tim_enter("sum int");
466	msg_->sum(trans_int1,nrnsnbasis*ni,trans_int2);
467	tim_exit("sum int");
468
469	/* begin second quarter transformation */
470
471	tim_enter("bzerofast trans_int2");
472	bzerofast(trans_int2,nfuncmaxnfuncmaxnbasis*ni);
473	tim_exit("bzerofast trans_int2");
474
475	tim_enter("2. quart. tr.");
476
477	for (bf3 = 0; bf3 < nr; bf3++) {
478
479	for (bf4 = 0; bf4 < ns; bf4++) {
480	if (R == S && bf4 > bf3) continue;
481
482	for (q = 0; q < nbasis; q++) {
483	c_qa = &scf_vector[q][nocc + a_offset];
484	iqrs = &trans_int1[((bf4nr + bf3)nbasis + q)*ni];
485	iars_ptr = &trans_int2[((bf4nr + bf3)a_number)*ni];
486
487	for (a = 0; a < a_number; a++) {
488
489	for (i=ni; i; i--) {
490	iars_ptr++ += c_qa * *iqrs++;
491	}
492
493	iqrs -= ni;
494	c_qa++;
495	}
496	}
497	}
498	}
499	tim_exit("2. quart. tr.");
500
501	/* begin third quarter transformation */
502	tim_enter("3. quart. tr.");
503
504
505	for (bf3 = 0; bf3<nr; bf3++) {
506	r = basis()->shell_to_function(R) + bf3;
507
508	for (bf4 = 0; bf4 <= (R == S ? bf3:(ns-1)); bf4++) {
509	s = basis()->shell_to_function(S) + bf4;
510
511	for (i=0; i<ni; i++) {
512	tmp_index = i(i+1)/2 + ii_offset;
513
514	for (a=0; a<a_number; a++) {
515	iars = trans_int2[((bf4nr + bf3)a_number + a)*ni + i];
516	if (r == s) iars *= 0.5;
517	iajs_ptr = &trans_int3[tmp_index + dim_ij(a + a_numbers)];
518	iajr_ptr = &trans_int3[tmp_index + dim_ij(a + a_numberr)];
519	c_rj = scf_vector[r];
520	c_sj = scf_vector[s];
521
522	for (j=0; j<=i+i_offset; j++) {
523	iajs_ptr++ += c_rj++ * iars;
524	iajr_ptr++ += c_sj++ * iars;
525	}
526	}
527	}
528	} /* exit bf4 loop */
529	} /* exit bf3 loop */
530	tim_exit("3. quart. tr.");
531	} /* exit S loop */
532	} /* exit R loop */
533	tim_exit("RS loop");
534
535	/* begin fourth quarter transformation; *
536	* first tansform integrals with only s.o. indices; *
537	* these integrals are needed to compute the denominators *
538	* in the various terms contributing to the correlation energy *
539	* and must all be computed in the first pass; *
540	* the integrals are summed into the array socc_sum: *
541	* socc_sum[isocc] = sum over asocc of (isocc asocc\|asocc isocc) *
542	* (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s) *
543	* the individual integrals are not saved here, only the sums are kept */
544
545	if (debug_) {
546	ExEnv::out0() << indent << "Beginning 4. quarter transform" << endl;
547	}
548
549	tim_enter("4. quart. tr.");
550	if (pass == 0 && me == 0) {
551	if (nsocc) bzerofast(socc_sum,nsocc);
552	for (isocc=0; isocc<nsocc; isocc++) {
553
554	for (r=0; r<nbasis; r++) {
555
556	for (asocc=0; asocc<nsocc; asocc++) {
557	socc_sum[isocc] += scf_vector[r][nocc+asocc]*
558	trans_int3[isocc(isocc+1)/2 + isocci_offset
559	+ isocc + dim_ij(asocc + a_numberr)];
560	}
561	}
562	}
563	}
564
565	tim_enter("bcast0 socc_sum");
566	if (nsocc) msg_->bcast(socc_sum,nsocc);
567	tim_exit("bcast0 socc_sum");
568
569	tim_exit("4. quart. tr.");
570
571	/* now we have all the sums of integrals involving s.o.'s (socc_sum); *
572	* begin fourth quarter transformation for all integrals (including *
573	* integrals with only s.o. indices); use restriction j <= (i_offset+i) *
574	* to save flops */
575
576	compute_index = 0;
577
578	for (i=0; i<ni; i++) {
579
580	for (j=0; j <= (i_offset+i); j++) {
581
582	tim_enter("4. quart. tr.");
583
584	bzerofast(trans_int4_node,nvir*a_number);
585
586	for (r=0; r<nbasis; r++) {
587
588	for (a=0; a<a_number; a++) {
589	iajb = &trans_int4_node[a*nvir];
590	c_rb = &scf_vector[r][nocc];
591	iajr = trans_int3[i(i+1)/2 + ii_offset + j + dim_ij(a+a_numberr)];
592
593	for (b=0; b<nvir; b++) {
594	iajb++ += c_rb++ * iajr;
595	}
596	}
597	}
598
599	tim_exit("4. quart. tr.");
600
601	/* collect each node's part of fully transf. int. into trans_int4 */
602	tim_enter("collect");
603	msg_->collect(trans_int4_node,a_vector,trans_int4);
604	tim_exit("collect");
605
606
607	/* we now have the fully transformed integrals (ia\|jb) *
608	* for one i, one j (j <= i_offset+i), and all a and b; *
609	* compute contribution to the OPT1 and OPT2 correlation *
610	* energies; use restriction b <= a to save flops */
611
612	tim_enter("compute ecorr");
613
614	for (a=0; a<nvir; a++) {
615	for (b=0; b<=a; b++) {
616	compute_index++;
617	if (compute_index%nproc != me) continue;
618
619	docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
620	+ (j >= nsocc && j < nocc);
621	socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
622	vir_index = (a >= nsocc) + (b >= nsocc);
623
624	if (socc_index >= 3) continue; /* skip to next b value */
625
626	delta_ijab = evals_open[i_offset+i] + evals_open[j]
627	- evals_open[nocc+a] - evals_open[nocc+b];
628
629	/* determine integral type and compute energy contribution */
630	if (docc_index == 2 && vir_index == 2) {
631	if (i_offset+i == j && a == b) {
632	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
633	ecorr_opt2 += contrib1/delta_ijab;
634	ecorr_opt1 += contrib1/delta_ijab;
635	}
636	else if (i_offset+i == j \|\| a == b) {
637	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
638	ecorr_opt2 += 2*contrib1/delta_ijab;
639	ecorr_opt1 += 2*contrib1/delta_ijab;
640	}
641	else {
642	contrib1 = trans_int4[a*nvir + b];
643	contrib2 = trans_int4[b*nvir + a];
644	ecorr_opt2 += 4(contrib1contrib1 + contrib2*contrib2
645	- contrib1*contrib2)/delta_ijab;
646	ecorr_opt1 += 4(contrib1contrib1 + contrib2*contrib2
647	- contrib1*contrib2)/delta_ijab;
648	}
649	}
650	else if (docc_index == 2 && socc_index == 2) {
651	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
652	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
653	ecorr_opt2 += contrib1/
654	(delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
655	ecorr_opt1 += contrib1/delta_ijab;
656	}
657	else if (socc_index == 2 && vir_index == 2) {
658	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
659	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
660	ecorr_opt2 += contrib1/
661	(delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
662	ecorr_opt1 += contrib1/delta_ijab;
663	}
664	else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
665	if (i_offset+i == j) {
666	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
667	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
668	ecorr_opt1 += contrib1/delta_ijab;
669	}
670	else {
671	contrib1 = trans_int4[a*nvir + b];
672	contrib2 = trans_int4[b*nvir + a];
673	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
674	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[b]);
675	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
676	- contrib1*contrib2)/delta_ijab;
677	}
678	}
679	else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
680	contrib1 = trans_int4[bnvir+a]trans_int4[b*nvir+a];
681	if (j == b) {
682	/* to compute the total energy contribution from an integral *
683	* of the type (is1\|s1a) (i=d.o., s1=s.o., a=unocc.), we need *
684	* the (is\|sa) integrals for all s=s.o.; these integrals are *
685	* therefore stored here in the array mo_int_do_so_vir, and *
686	* the energy contribution is computed after exiting the loop *
687	* over i-batches (pass) */
688	mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
689	(i_offset+i-nsocc + ndocc*b)] =
690	trans_int4[b*nvir + a];
691	ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
692	ecorr_opt1 += 1.5*contrib1/delta_ijab;
693	ecorr_zapt2_contrib += contrib1/
694	(delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
695	+ 0.5*contrib1/delta_ijab;
696	}
697	else {
698	ecorr_opt2 += contrib1/
699	(delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
700	ecorr_opt1 += contrib1/delta_ijab;
701	}
702	}
703	else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
704	if (a == b) {
705	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
706	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
707	ecorr_opt1 += contrib1/delta_ijab;
708	}
709	else {
710	contrib1 = trans_int4[a*nvir + b];
711	contrib2 = trans_int4[b*nvir + a];
712	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
713	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[j]);
714	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
715	- contrib1*contrib2)/delta_ijab;
716	}
717	}
718	} /* exit b loop */
719	} /* exit a loop */
720	tim_exit("compute ecorr");
721	} /* exit j loop */
722	} /* exit i loop */
723
724	if (nsocc == 0 && npass > 1 && pass < npass - 1) {
725	double passe = ecorr_opt2;
726	msg_->sum(passe);
727	ExEnv::out0() << indent
728	<< "Partial correlation energy for pass " << pass << ":" << endl;
729	ExEnv::out0() << indent
730	<< scprintf(" restart_ecorr = %18.14f", passe)
731	<< endl;
732	ExEnv::out0() << indent
733	<< scprintf(" restart_orbital_v1 = %d", ((pass+1) * ni))
734	<< endl;
735	}
736	} /* exit loop over i-batches (pass) */
737
738	// don't need the AO integrals and threads anymore
739	double aoint_computed = 0.0;
740	for (i=0; i<thr_->nthread(); i++) {
741	tbint[i] = 0;
742	aoint_computed += e1thread[i]->aoint_computed();
743	delete e1thread[i];
744	}
745	delete[] e1thread;
746	delete[] tbint;
747
748	/* compute contribution from excitations of the type is1 -> s1a where *
749	* i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a, *
750	* where i and a have the same spin, contribute to this term; *
751	* (Brillouin's theorem not satisfied for ROHF wave functions); */
752
753	tim_enter("compute ecorr");
754
755	if (nsocc > 0) {
756	tim_enter("sum mo_int_do_so_vir");
757	msg_->sum(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc),mo_int_tmp);
758	tim_exit("sum mo_int_do_so_vir");
759	}
760
761	/* add extra contribution for triplet and higher spin multiplicities *
762	* contribution = sum over s1 and s2<s1 of (is1\|s1a)(is2\|s2a)/delta /
763
764	if (me == 0 && nsocc) {
765	for (i=0; i<ndocc; i++) {
766
767	for (a=0; a<nvir-nsocc; a++) {
768	delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
769
770	for (s1=0; s1<nsocc; s1++) {
771
772	for (s2=0; s2<s1; s2++) {
773	contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs1)]*
774	mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs2)]/delta;
775	ecorr_opt2 += contrib1;
776	ecorr_opt1 += contrib1;
777	}
778	}
779	} /* exit a loop */
780	} /* exit i loop */
781	}
782
783	tim_exit("compute ecorr");
784
785	ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
786	ecorr_opt2 += ecorr_opt2_contrib;
787	msg_->sum(ecorr_opt1);
788	msg_->sum(ecorr_opt2);
789	msg_->sum(ecorr_zapt2);
790	msg_->sum(aoint_computed);
791
792	if (restart_orbital_v1_) {
793	ecorr_opt1 += restart_ecorr_;
794	ecorr_opt2 += restart_ecorr_;
795	ecorr_zapt2 += restart_ecorr_;
796	}
797
798	escf = reference_->energy();
799	hf_energy_ = escf;
800
801	if (me == 0) {
802	eopt2 = escf + ecorr_opt2;
803	eopt1 = escf + ecorr_opt1;
804	ezapt2 = escf + ecorr_zapt2;
805
806	/* print out various energies etc.*/
807
808	ExEnv::out0() << indent
809	<< "Number of shell quartets for which AO integrals would" << endl
810	<< indent
811	<< "have been computed without bounds checking: "
812	<< npassnshellnshell(nshell+1)(nshell+1)/4 << endl;
813	ExEnv::out0() << indent
814	<< "Number of shell quartets for which AO integrals" << endl
815	<< indent << "were computed: " << aoint_computed << endl;
816	ExEnv::out0() << indent
817	<< scprintf("ROHF energy [au]: %17.12lf\n", escf);
818	ExEnv::out0() << indent
819	<< scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
820	ExEnv::out0() << indent
821	<< scprintf("OPT2 second order correction [au]: %17.12lf\n", ecorr_opt2);
822	ExEnv::out0() << indent
823	<< scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
824	ExEnv::out0() << indent
825	<< scprintf("ZAPT2 correlation energy [au]: %17.12lf\n", ecorr_zapt2);
826	ExEnv::out0() << indent
827	<< scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
828	}
829	msg_->bcast(eopt1);
830	msg_->bcast(eopt2);
831	msg_->bcast(ezapt2);
832
833	if (method_ && !strcmp(method_,"opt1")) {
834	set_energy(eopt1);
835	set_actual_value_accuracy(reference_->actual_value_accuracy()
836	*ref_to_mp2_acc);
837	}
838	else if (method_ && !strcmp(method_,"opt2")) {
839	set_energy(eopt2);
840	set_actual_value_accuracy(reference_->actual_value_accuracy()
841	*ref_to_mp2_acc);
842	}
843	else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
844	set_energy(ezapt2);
845	set_actual_value_accuracy(reference_->actual_value_accuracy()
846	*ref_to_mp2_acc);
847	}
848	else {
849	if (!(!method_ \|\| !strcmp(method_,"zapt"))) {
850	ExEnv::out0() << indent
851	<< "MBPT2: bad method: " << method_ << ", using zapt" << endl;
852	}
853	set_energy(ezapt2);
854	set_actual_value_accuracy(reference_->actual_value_accuracy()
855	*ref_to_mp2_acc);
856	}
857
858	free(trans_int1);
859	free(trans_int2);
860	free(trans_int3);
861	free(trans_int4_node);
862	free(trans_int4);
863	free(a_vector);
864	if (nsocc) free(socc_sum);
865	if (nsocc) free(mo_int_do_so_vir);
866	if (nsocc) free(mo_int_tmp);
867	free(evals_open);
868
869	delete[] scf_vector;
870	delete[] scf_vector_dat;
871	}
872
873	////////////////////////////////////////////////////////////////////////////
874
875	// Local Variables:
876	// mode: c++
877	// c-file-style: "CLJ-CONDENSED"
878	// End:

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/hsosv1.cc@ 72461c

Download in other formats: