Context Navigation

hsosv2lb.cc@ 72461c

Visit:

Action_Thermostats Add_AtomRandomPerturbation Add_RotateAroundBondAction Add_SelectAtomByNameAction Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_StructOpt_integration_tests AutomationFragmentation_failures Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_ChronosMutex Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion GeometryObjects Gui_displays_atomic_force_velocity IndependentFragmentGrids_IntegrationTest JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks RotateToPrincipalAxisSystem_UndoRedo StoppableMakroAction Subpackage_levmar Subpackage_vmg ThirdParty_MPQC_rebuilt_buildsystem TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps Ubuntu_1604_changes stable

Last change on this file since 72461c was 860145, checked in by Frederik Heber <heber@…>, 8 years ago
Merge commit '0b990dfaa8c6007a996d030163a25f7f5fc8a7e7' as 'ThirdParty/mpqc_open'
Property mode set to `100644`
File size: 35.5 KB

Line
1	//
2	// hsosv2lb.cc
3	//
4	// Copyright (C) 1996 Limit Point Systems, Inc.
5	//
6	// Author: Ida Nielsen <ida@kemi.aau.dk>
7	// Maintainer: LPS
8	//
9	// This file is part of the SC Toolkit.
10	//
11	// The SC Toolkit is free software; you can redistribute it and/or modify
12	// it under the terms of the GNU Library General Public License as published by
13	// the Free Software Foundation; either version 2, or (at your option)
14	// any later version.
15	//
16	// The SC Toolkit is distributed in the hope that it will be useful,
17	// but WITHOUT ANY WARRANTY; without even the implied warranty of
18	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	// GNU Library General Public License for more details.
20	//
21	// You should have received a copy of the GNU Library General Public License
22	// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23	// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24	//
25	// The U.S. Government is granted a limited license as per AL 91-7.
26	//
27
28	#include <math.h>
29
30	#include <util/misc/timer.h>
31	#include <util/misc/formio.h>
32	#include <chemistry/molecule/molecule.h>
33	#include <chemistry/qc/mbpt/mbpt.h>
34	#include <chemistry/qc/mbpt/bzerofast.h>
35
36	using namespace std;
37	using namespace sc;
38
39	static void iqs(int item,int index,int left,int right);
40	static void iquicksort(int item,int index,int n);
41	static void findprocminmax(int *nbf, int nproc,
42	int procmin, int procmax, int minbf, int maxbf);
43	static void findshellmax(int myshellsizes, int nRshell, int shellmax,
44	int *shellmaxindex);
45	static void expandintarray(int *&a, int dim);
46
47	void
48	MBPT2::compute_hsos_v2_lb()
49	{
50	int i, j, k, l;
51	int s1, s2;
52	int a, b;
53	int isocc, asocc; // indices running over singly occupied orbitals
54	int nfuncmax = basis()->max_nfunction_in_shell();
55	int nvir;
56	int nshell;
57	int shellmax;
58	int shellmaxindex;
59	int nocc=0,ndocc=0,nsocc=0;
60	int i_offset;
61	int npass, pass;
62	int ni;
63	int np, nq, nr, ns;
64	int P, Q, R, S;
65	int p, q, r, s;
66	int bf1, bf2, bf3, bf4;
67	int bf3_offset;
68	int nbfmoved;
69	int nbfav; // average number of r basis functions per node
70	int minbf, maxbf; // max/min number of (r) basis functions on a node
71	int index;
72	int compute_index;
73	int col_index;
74	int tmp_index;
75	int dim_ij;
76	int docc_index, socc_index, vir_index;
77	int me;
78	int nproc;
79	int procmin, procmax; // processor with most/fewest basis functions
80	int rest;
81	int r_offset;
82	int min;
83	int iproc;
84	int nRshell;
85	int imyshell;
86	int *myshells; // the R indices processed by node me
87	int *myshellsizes; // sizes of the shells (after split) on node me
88	int *split_info; // on each node: offset for each shell; -1 if shell not split
89	int *shellsize; // size of each shell
90	int *sorted_shells; // sorted shell indices: large shells->small shells
91	int *nbf; // number of basis functions processed by each node
92	int *proc; // element k: processor which will process shell k
93	int aoint_computed = 0;
94	double A, B, C, ni_top, max, ni_double; // variables used to compute ni
95	double *evals_open; // reordered scf eigenvalues
96	const double *intbuf; // 2-electron AO integral buffer
97	double *trans_int1; // partially transformed integrals
98	double *trans_int2; // partially transformed integrals
99	double *trans_int3; // partially transformed integrals
100	double *trans_int4; // fully transformed integrals
101	double *trans_int4_tmp; // scratch array
102	double *mo_int_do_so_vir=0;//mo integral (is\|sa); i:d.o.,s:s.o.,a:vir
103	double *mo_int_tmp=0; // scratch array used in global summations
104	double *socc_sum=0; // sum of 2-el integrals involving only s.o.'s
105	double *socc_sum_tmp=0;// scratch array
106	double iqrs, iprs;
107	double *iars_ptr;
108	double iars;
109	double iajr;
110	double *iajr_ptr;
111	double *iajb;
112	double pqrs;
113	double *c_qa;
114	double c_rb, c_pi, c_qi, c_sj;
115	double delta_ijab;
116	double delta;
117	double contrib1, contrib2;
118	double ecorr_opt2=0,ecorr_opt1=0;
119	double ecorr_zapt2;
120	double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0;
121	double escf;
122	double eopt2,eopt1,ezapt2;
123	double tol; // log2 of the erep tolerance (erep < 2^tol => discard)
124
125	me = msg_->me();
126
127	ExEnv::out0() << indent << "Just entered OPT2 program (opt2v2lb)" << endl;
128
129	tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10
130
131	nproc = msg_->n();
132	ExEnv::out0() << indent << "nproc = " << nproc << endl;
133
134	ndocc = nsocc = 0;
135	const double epsilon = 1.0e-4;
136	for (i=0; i<oso_dimension()->n(); i++) {
137	if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++;
138	else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++;
139	}
140
141	// Do a few preliminary tests to make sure the desired calculation
142	// can be done (and appears to be meaningful!)
143
144	if (ndocc == 0 && nsocc == 0) {
145	ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl;
146	abort();
147	}
148
149	if (nfzc > ndocc) {
150	ExEnv::err0()
151	<< "The number of frozen core orbitals exceeds the number" << endl
152	<< "of doubly occupied orbitals; program exiting" << endl;
153	abort();
154	}
155
156	if (nfzv > noso - ndocc - nsocc) {
157	ExEnv::err0()
158	<< "The number of frozen virtual orbitals exceeds the number" << endl
159	<< "of unoccupied orbitals; program exiting" << endl;
160	abort();
161	}
162
163	ndocc = ndocc - nfzc;
164	// nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb.
165	nvir = noso - ndocc - nfzc - nfzv;
166	// nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb.
167	nocc = ndocc + nsocc;
168	nshell = basis()->nshell();
169
170	// Allocate storage for some arrays used for keeping track of which R
171	// indices are processed by each node
172	shellsize = (int) malloc(nshellsizeof(int));
173	sorted_shells = (int) malloc(nshellsizeof(int));
174	nbf = (int) malloc(nprocsizeof(int));
175	proc = (int) malloc(nshellsizeof(int));
176
177
178	///////////////////////////////////////////////////////
179	// Begin distributing R shells between nodes so all
180	// nodes get ca. the same number of r basis functions
181	///////////////////////////////////////////////////////
182
183	// Compute the size of each shell
184	for (i=0; i<nshell; i++) {
185	shellsize[i] = basis()->shell(i).nfunction();
186	}
187
188	// Do an index sort (large -> small) of shellsize to form sorted_shells
189	iquicksort(shellsize,sorted_shells,nshell);
190
191	// Initialize nbf
192	for (i=0; i<nproc; i++) nbf[i] = 0;
193
194	for (i=0; i<nshell; i++) {
195	min = nbf[0];
196	iproc = 0;
197	for (j=1; j<nproc; j++) {
198	if (nbf[j] < min) {
199	iproc = j;
200	min = nbf[j];
201	}
202	}
203	proc[sorted_shells[i]] = iproc;
204	nbf[iproc] += shellsize[sorted_shells[i]];
205	}
206	if (me == 0) {
207	ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl;
208	for (i=0; i<nproc; i++) {
209	if (i%12 == 0) ExEnv::out0() << indent;
210	ExEnv::out0() << scprintf(" %4i",nbf[i]);
211	if ((i+1)%12 == 0) ExEnv::out0() << endl;
212	}
213	ExEnv::out0() << endl;
214	}
215
216	// Determine which shells are to be processed by node me
217	nRshell = 0;
218	for (i=0; i<nshell; i++) {
219	if (proc[i] == me) nRshell++;
220	}
221	myshells = (int) malloc(nRshellsizeof(int));
222	imyshell = 0;
223	for (i=0; i<nshell; i++) {
224	if (proc[i] == me) {
225	myshells[imyshell] = i;
226	imyshell++;
227	}
228	}
229
230	/////////////////////////////////////////////////////////////
231	// End of preliminary distribution of R shells between nodes
232	/////////////////////////////////////////////////////////////
233
234	// Compute the average number of basis functions per node
235	nbfav = nbasis/nproc;
236	if (nbasis%nproc) nbfav++;
237
238	myshellsizes = (int) malloc(nRshellsizeof(int));
239	split_info = (int) malloc(nRshellsizeof(int));
240	for (j=0; j<nRshell; j++) {
241	myshellsizes[j] = basis()->shell(myshells[j]).nfunction();
242	split_info[j] = -1;
243	}
244
245	// Find the processor with the most/fewest basis functions
246	findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf);
247	if (maxbf > nbfav) {
248	ExEnv::out0() << indent << "Redistributing basis functions" << endl;
249	}
250
251	while (maxbf > nbfav) {
252	msg_->sync();
253	if (me == procmax) {
254
255	findshellmax(myshellsizes, nRshell, &shellmax, &shellmaxindex);
256	nbfmoved = 0;
257	while (maxbf>nbfav && minbf<nbfav && shellmax>1) {
258	shellmax--;
259	nbfmoved++;
260	maxbf--;
261	minbf++;
262	}
263	myshellsizes[shellmaxindex] = shellmax;
264	if (split_info[shellmaxindex] == -1) split_info[shellmaxindex] = 0;
265	shellmax += nbfmoved;
266
267	// Send nbfmoved from procmax to all other nodes
268	msg_->bcast(nbfmoved,procmax);
269
270	// Send variables to node procmin
271	msg_->send(procmin,&myshells[shellmaxindex],1);
272	msg_->send(procmin,&shellmax,1);
273
274	}
275	else {
276	// Receive nbfmoved from procmax
277	msg_->bcast(nbfmoved,procmax);
278	}
279
280	nbf[procmax] -= nbfmoved;
281
282	if (me == procmin) {
283	expandintarray(myshellsizes,nRshell);
284	expandintarray(myshells,nRshell);
285	expandintarray(split_info,nRshell);
286	nRshell++;
287	myshellsizes[nRshell-1] = nbfmoved;
288	msg_->recv(procmax,&myshells[nRshell-1],1);
289	msg_->recv(procmax,&split_info[nRshell-1],1);
290	split_info[nRshell-1] -= myshellsizes[nRshell-1];
291	}
292
293	nbf[procmin] += nbfmoved;
294	msg_->sync();
295	findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf);
296
297	}
298
299	if (me == 0) {
300	ExEnv::out0() << indent
301	<< "New distribution of basis functions between nodes:" << endl;
302	for (i=0; i<nproc; i++) {
303	if (i%12 == 0) ExEnv::out0() << indent;
304	ExEnv::out0() << scprintf(" %4i",nbf[i]);
305	if ((i+1)%12 == 0) ExEnv::out0() << endl;
306	}
307	ExEnv::out0() << endl;
308	}
309
310
311	//////////////////////////////////////////////////////////
312	// End of distribution of R shells and r basis functions
313	//////////////////////////////////////////////////////////
314
315	// Compute batch size ni for opt2 loops;
316	// need to store the following arrays of type double : trans_int1-4,
317	// trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp,
318	// mo_int_do_so_vir, mo_int_tmp,
319	// and the following arrays of type int: myshells, shellsize,
320	// sorted_shells, nbf, and proc
321	A = -0.5sizeof(double)nbf[me]*nvir;
322	B = sizeof(double)(nfuncmaxnfuncmaxnbasis + nvir + noccnbf[me]*nvir
323	+ nbf[me]nvir0.5);
324	C = sizeof(double)(2nvirnvir + (nbasis+1)(nvir+nocc) + 2*nsocc
325	+ 2ndoccnsocc*(nvir-nsocc))
326	+ sizeof(int)(3nshell + nproc + nRshell);
327	ni_top = -B/(2*A);
328	max = Ani_topni_top + B*ni_top +C;
329	if (max <= mem_alloc) {
330	ni = nocc;
331	}
332	else {
333	ni_double = (-B + sqrt((double)(BB - 4A(C-mem_alloc))))/(2A);
334	ni = (int) ni_double;
335	if (ni > nocc) ni = nocc;
336	max = mem_alloc;
337	}
338
339	size_t mem_remaining = mem_alloc - (size_t)max;
340
341	// Set ni equal to the smallest batch size for any node
342	msg_->min(ni);
343	msg_->bcast(ni);
344
345	if (ni < nsocc) {
346	ExEnv::err0() << "Not enough memory allocated" << endl;
347	abort();
348	}
349
350	if (ni < 1) { // this applies only to a closed shell case
351	ExEnv::err0() << "Not enough memory allocated" << endl;
352	abort();
353	}
354
355	ExEnv::out0() << indent << "Computed batchsize: " << ni << endl;
356
357	if (nocc == ni) {
358	npass = 1;
359	rest = 0;
360	}
361	else {
362	rest = nocc%ni;
363	npass = (nocc - rest)/ni + 1;
364	if (rest == 0) npass--;
365	}
366
367	if (me == 0) {
368	ExEnv::out0() << indent << " npass rest nbasis nshell nfuncmax"
369	" ndocc nsocc nvir nfzc nfzv" << endl;
370	ExEnv::out0() << indent
371	<< scprintf(" %-4i %-3i %-5i %-4i %-3i"
372	" %-3i %-3i %-3i %-3i %-3i\n",
373	npass,rest,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);
374	ExEnv::out0() << indent
375	<< scprintf("Using %i bytes of memory",mem_alloc) << endl;
376	}
377
378	//////////////////////
379	// Test that ni is OK
380	//////////////////////
381	if (me == 0) {
382	ExEnv::out0() << indent
383	<< scprintf("Memory allocated: %i", mem_alloc) << endl;
384	ExEnv::out0() << indent
385	<< scprintf("Memory used : %lf", Anini+B*ni+C) << endl;
386	if (Anini + B*ni +C > mem_alloc) {
387	ExEnv::err0() << "Problems with memory allocation: "
388	<< "Using more memory than allocated" << endl;
389	abort();
390	}
391	}
392
393	//////////////////////////////////////////////////////////////////
394	// The scf vector might be distributed between the nodes,
395	// but for OPT2 each node needs its own copy of the vector;
396	// therefore, put a copy of the scf vector on each node;
397	// while doing this, duplicate columns corresponding to singly
398	// occupied orbitals and order columns as [socc docc socc unocc]
399	// Also rearrange scf eigenvalues as [socc docc socc unocc]
400	// want socc first to get the socc's in the first batch
401	// (need socc's to compute energy denominators - see
402	// socc_sum comment below)
403	/////////////////////////////////////////////////////////
404	evals_open = (double) malloc((noso+nsocc-nfzc-nfzv)sizeof(double));
405
406	RefDiagSCMatrix occ;
407	RefDiagSCMatrix evals;
408	RefSCMatrix Scf_Vec;
409	eigen(evals, Scf_Vec, occ);
410
411	if (debug_) {
412	evals.print("eigenvalues");
413	Scf_Vec.print("eigenvectors");
414	}
415
416	double scf_vectort_dat = new double[nbasisnoso];
417	Scf_Vec->convert(scf_vectort_dat);
418
419	double** scf_vectort = new double*[nocc + nvir];
420
421	int idoc = 0, ivir = 0, isoc = 0;
422	for (i=nfzc; i<noso-nfzv; i++) {
423	if (occ(i) >= 2.0 - epsilon) {
424	evals_open[idoc+nsocc] = evals(i);
425	scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];
426	idoc++;
427	}
428	else if (occ(i) >= 1.0 - epsilon) {
429	evals_open[isoc] = evals(i);
430	scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];
431	evals_open[isoc+nocc] = evals(i);
432	scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];
433	isoc++;
434	}
435	else {
436	if (ivir < nvir) {
437	evals_open[ivir+nocc+nsocc] = evals(i);
438	scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];
439	}
440	ivir++;
441	}
442	}
443	// need the transpose of the vector
444	double *scf_vector = new double[nbasis];
445	double scf_vector_dat = new double[(nocc+nvir)nbasis];
446	for (i=0; i<nbasis; i++) {
447	scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];
448	for (j=0; j<nocc+nvir; j++) {
449	scf_vector[i][j] = scf_vectort[j][i];
450	}
451	}
452	delete[] scf_vectort;
453	delete[] scf_vectort_dat;
454
455	////////////////////////////////////////
456	// Allocate storage for various arrays
457	////////////////////////////////////////
458
459	dim_ij = noccni - ni(ni - 1)/2;
460
461	trans_int1 = (double) malloc(nfuncmaxnfuncmaxnbasisni*sizeof(double));
462	trans_int2 = (double) malloc(nvirni*sizeof(double));
463	trans_int3 = (double) malloc(nbf[me]nvirdim_ijsizeof(double));
464	trans_int4 = (double) malloc(nvirnvir*sizeof(double));
465	trans_int4_tmp = (double) malloc(nvirnvir*sizeof(double));
466	if (nsocc) socc_sum = (double) malloc(nsoccsizeof(double));
467	if (nsocc) socc_sum_tmp = (double) malloc(nsoccsizeof(double));
468	if (nsocc) mo_int_do_so_vir =
469	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
470	if (nsocc) mo_int_tmp =
471	(double) malloc(ndoccnsocc(nvir-nsocc)sizeof(double));
472
473	if (nsocc) bzerofast(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc));
474
475	// create the integrals object
476	integral()->set_storage(mem_remaining);
477	tbint_ = integral()->electron_repulsion();
478	intbuf = tbint_->buffer();
479
480	/////////////////////////////////////
481	// Begin opt2 loops
482	/////////////////////////////////////
483
484
485	for (pass=0; pass<npass; pass++) {
486	i_offset = pass*ni;
487	if ((pass == npass - 1) && (rest != 0)) ni = rest;
488
489	r_offset = 0;
490	bzerofast(trans_int3,nbf[me]nvirdim_ij);
491
492	tim_enter("RS loop");
493
494	for (imyshell=0; imyshell<nRshell; imyshell++) {
495
496	R = myshells[imyshell];
497	nr = myshellsizes[imyshell];
498
499	for (S = 0; S < nshell; S++) {
500	ns = basis()->shell(S).nfunction();
501	tim_enter("bzerofast trans_int1");
502	bzerofast(trans_int1,nfuncmaxnfuncmaxnbasis*ni);
503	tim_exit("bzerofast trans_int1");
504
505	tim_enter("PQ loop");
506
507	for (P = 0; P < nshell; P++) {
508	np = basis()->shell(P).nfunction();
509
510	for (Q = 0; Q <= P; Q++) {
511	if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {
512	continue; // skip ereps less than tol
513	}
514
515	aoint_computed++;
516
517	nq = basis()->shell(Q).nfunction();
518
519	tim_enter("erep");
520	tbint_->compute_shell(P,Q,R,S);
521	tim_exit("erep");
522
523	tim_enter("1. quart. tr.");
524
525	for (bf1 = 0; bf1 < np; bf1++) {
526	p = basis()->shell_to_function(P) + bf1;
527
528	for (bf2 = 0; bf2 < nq; bf2++) {
529	q = basis()->shell_to_function(Q) + bf2;
530	if (q > p) {
531	// if q > p: want to skip the loops over bf3-4
532	// and larger bf2 values, so increment bf1 by 1
533	// ("break")
534	break;
535	}
536
537	for (bf3 = 0; bf3 < nr; bf3++) {
538	bf3_offset = 0;
539	if (split_info[imyshell] != -1) bf3_offset = split_info[imyshell];
540
541	for (bf4 = 0; bf4 < ns; bf4++) {
542
543	index = bf4 + ns*(bf3+bf3_offset +
544	basis()->shell(R).nfunction()(bf2 + nqbf1));
545
546	if (fabs(intbuf[index]) > 1.0e-15) {
547	pqrs = intbuf[index];
548
549	iqrs = &trans_int1[((bf4nr + bf3)nbasis + q)*ni];
550	iprs = &trans_int1[((bf4nr + bf3)nbasis + p)*ni];
551
552	if (p == q) pqrs *= 0.5;
553	col_index = i_offset;
554	c_pi = &scf_vector[p][col_index];
555	c_qi = &scf_vector[q][col_index];
556
557	for (i=ni; i; i--) {
558	iqrs++ += pqrs *c_pi++;
559	iprs++ += pqrs *c_qi++;
560	}
561	}
562	} // exit bf4 loop
563	} // exit bf3 loop
564	} // exit bf2 loop
565	} // exit bf1 loop
566	tim_exit("1. quart. tr.");
567	} // exit Q loop
568	} // exit P loop
569	tim_exit("PQ loop");
570
571	// Begin second and third quarter transformations
572
573	for (bf3 = 0; bf3 < nr; bf3++) {
574	r = r_offset + bf3;
575
576	for (bf4 = 0; bf4 < ns; bf4++) {
577	s = basis()->shell_to_function(S) + bf4;
578
579	tim_enter("bzerofast trans_int2");
580	bzerofast(trans_int2,nvir*ni);
581	tim_exit("bzerofast trans_int2");
582
583	tim_enter("2. quart. tr.");
584
585	for (q = 0; q < nbasis; q++) {
586	iars_ptr = trans_int2;
587	iqrs = &trans_int1[((bf4nr + bf3)nbasis + q)*ni];
588	c_qa = &scf_vector[q][nocc];
589
590	for (a = 0; a < nvir; a++) {
591
592	for (i=ni; i; i--) {
593	iars_ptr++ += c_qa * *iqrs++;
594	}
595
596	iqrs -= ni;
597	c_qa++;
598	}
599	} // exit q loop
600	tim_exit("2. quart. tr.");
601
602	// Begin third quarter transformation
603
604	tim_enter("3. quart. tr.");
605
606	for (i=0; i<ni; i++) {
607	tmp_index = i(i+1)/2 + ii_offset;
608
609	for (a=0; a<nvir; a++) {
610	iars = trans_int2[a*ni + i];
611	c_sj = scf_vector[s];
612	iajr_ptr = &trans_int3[tmp_index + dim_ij(a + nvirr)];
613
614	for (j=0; j<=i+i_offset; j++) {
615	iajr_ptr++ += c_sj++ * iars;
616	}
617	}
618	} // exit i loop
619	tim_exit("3. quart. tr.");
620
621	} // exit bf4 loop
622	} // exit bf3 loop
623
624	} // exit S loop
625	r_offset += nr;
626	} // exit R loop
627	tim_exit("RS loop");
628
629	// Begin fourth quarter transformation;
630	// first tansform integrals with only s.o. indices;
631	// these integrals are needed to compute the denominators
632	// in the various terms contributing to the correlation energy
633	// and must all be computed in the first pass;
634	// the integrals are summed into the array socc_sum:
635	// socc_sum[isocc] = sum over asocc of (isocc asocc\|asocc isocc)
636	// (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s)
637	// the individual integrals are not saved here, only the sums are kept
638
639	if (pass == 0) {
640	tim_enter("4. quart. tr.");
641	if (nsocc) bzerofast(socc_sum,nsocc);
642	for (isocc=0; isocc<nsocc; isocc++) {
643
644	index = 0;
645	for (i=0; i<nRshell; i++) {
646	for (j=0; j<myshellsizes[i]; j++) {
647	r = basis()->shell_to_function(myshells[i]) + j;
648	if (split_info[i] != -1) r += split_info[i];
649
650	for (asocc=0; asocc<nsocc; asocc++) {
651	socc_sum[isocc] += scf_vector[r][nocc+asocc]*
652	trans_int3[isocc(isocc+1)/2 + isocci_offset
653	+ isocc + dim_ij(asocc + nvirindex)];
654	}
655	index++;
656	}
657	}
658	} // exit i loop
659
660	tim_exit("4. quart. tr.");
661
662	// Sum socc_sum contributions from each node (only if nsocc > 0
663	// since gop1 will fail if nsocc = 0)
664	if (nsocc > 0) {
665	tim_enter("global sum socc_sum");
666	msg_->sum(socc_sum,nsocc,socc_sum_tmp);
667	tim_exit("global sum socc_sum");
668	}
669
670	}
671
672	// Now we have all the sums of integrals involving s.o.'s (socc_sum);
673	// begin fourth quarter transformation for all integrals (including
674	// integrals with only s.o. indices); use restriction j <= (i_offset+i)
675	// to save flops
676
677	compute_index = 0;
678
679	for (i=0; i<ni; i++) {
680
681	for (j=0; j <= (i_offset+i); j++) {
682
683	tim_enter("4. quart. tr.");
684
685	bzerofast(trans_int4,nvir*nvir);
686
687	index = 0;
688	for (k=0; k<nRshell; k++) {
689	for (l=0; l<myshellsizes[k]; l++) {
690	r = basis()->shell_to_function(myshells[k]) + l;
691	if (split_info[k] != -1) r += split_info[k];
692
693	for (a=0; a<nvir; a++) {
694	iajb = &trans_int4[a*nvir];
695	iajr = trans_int3[i(i+1)/2 + ii_offset + j + dim_ij(a+nvirindex)];
696	c_rb = &scf_vector[r][nocc];
697
698	for (b=0; b<nvir; b++) {
699	iajb++ += c_rb++ * iajr;
700	}
701	}
702	index++;
703	}
704	} // end of k loop
705
706	tim_exit("4. quart. tr.");
707
708	tim_enter("global sum trans_int4");
709	msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);
710	tim_exit("global sum trans_int4");
711
712	// We now have the fully transformed integrals (ia\|jb)
713	// for one i, one j (j <= i_offset+i), and all a and b;
714	// compute contribution to the OPT1 and OPT2 correlation
715	// energies; use restriction b <= a to save flops
716
717	tim_enter("compute ecorr");
718
719	for (a=0; a<nvir; a++) {
720	for (b=0; b<=a; b++) {
721	compute_index++;
722	if (compute_index%nproc != me) continue;
723
724	docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)
725	+ (j >= nsocc && j < nocc);
726	socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);
727	vir_index = (a >= nsocc) + (b >= nsocc);
728
729	if (socc_index >= 3) continue; // skip to next b value
730
731	delta_ijab = evals_open[i_offset+i] + evals_open[j]
732	- evals_open[nocc+a] - evals_open[nocc+b];
733
734	// Determine integral type and compute energy contribution
735	if (docc_index == 2 && vir_index == 2) {
736	if (i_offset+i == j && a == b) {
737	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
738	ecorr_opt2 += contrib1/delta_ijab;
739	ecorr_opt1 += contrib1/delta_ijab;
740	}
741	else if (i_offset+i == j \|\| a == b) {
742	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
743	ecorr_opt2 += 2*contrib1/delta_ijab;
744	ecorr_opt1 += 2*contrib1/delta_ijab;
745	}
746	else {
747	contrib1 = trans_int4[a*nvir + b];
748	contrib2 = trans_int4[b*nvir + a];
749	ecorr_opt2 += 4(contrib1contrib1 + contrib2*contrib2
750	- contrib1*contrib2)/delta_ijab;
751	ecorr_opt1 += 4(contrib1contrib1 + contrib2*contrib2
752	- contrib1*contrib2)/delta_ijab;
753	}
754	}
755	else if (docc_index == 2 && socc_index == 2) {
756	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
757	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
758	ecorr_opt2 += contrib1/
759	(delta_ijab - 0.5*(socc_sum[a]+socc_sum[b]));
760	ecorr_opt1 += contrib1/delta_ijab;
761	}
762	else if (socc_index == 2 && vir_index == 2) {
763	contrib1 = (trans_int4[anvir + b] - trans_int4[bnvir + a])*
764	(trans_int4[anvir + b] - trans_int4[bnvir + a]);
765	ecorr_opt2 += contrib1/
766	(delta_ijab - 0.5*(socc_sum[i_offset+i]+socc_sum[j]));
767	ecorr_opt1 += contrib1/delta_ijab;
768	}
769	else if (docc_index == 2 && socc_index == 1 && vir_index == 1) {
770	if (i_offset+i == j) {
771	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
772	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[b]);
773	ecorr_opt1 += contrib1/delta_ijab;
774	}
775	else {
776	contrib1 = trans_int4[a*nvir + b];
777	contrib2 = trans_int4[b*nvir + a];
778	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
779	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[b]);
780	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
781	- contrib1*contrib2)/delta_ijab;
782	}
783	}
784	else if (docc_index == 1 && socc_index == 2 && vir_index == 1) {
785	contrib1 = trans_int4[bnvir+a]trans_int4[b*nvir+a];
786	if (j == b) {
787	// To compute the energy contribution from an integral of the
788	// type (is1\|s1a) (i=d.o., s1=s.o., a=unocc.), we need the
789	// (is\|sa) integrals for all s=s.o.; these integrals are
790	// therefore stored here in the array mo_int_do_so_vir, and
791	// the energy contribution is computed after exiting the loop
792	// over i-batches (pass)
793	mo_int_do_so_vir[a-nsocc + (nvir-nsocc)*
794	(i_offset+i-nsocc + ndocc*b)] =
795	trans_int4[b*nvir + a];
796	ecorr_opt2_contrib += 1.5*contrib1/delta_ijab;
797	ecorr_opt1 += 1.5*contrib1/delta_ijab;
798	ecorr_zapt2_contrib += contrib1/
799	(delta_ijab - 0.5*(socc_sum[j]+socc_sum[b]))
800	+ 0.5*contrib1/delta_ijab;
801	}
802	else {
803	ecorr_opt2 += contrib1/
804	(delta_ijab - 0.5*(socc_sum[j] + socc_sum[b]));
805	ecorr_opt1 += contrib1/delta_ijab;
806	}
807	}
808	else if (docc_index == 1 && socc_index == 1 && vir_index == 2) {
809	if (a == b) {
810	contrib1 = trans_int4[anvir + b]trans_int4[a*nvir + b];
811	ecorr_opt2 += contrib1/(delta_ijab - 0.5*socc_sum[j]);
812	ecorr_opt1 += contrib1/delta_ijab;
813	}
814	else {
815	contrib1 = trans_int4[a*nvir + b];
816	contrib2 = trans_int4[b*nvir + a];
817	ecorr_opt2 += 2(contrib1contrib1 + contrib2*contrib2
818	- contrib1contrib2)/(delta_ijab - 0.5socc_sum[j]);
819	ecorr_opt1 += 2(contrib1contrib1 + contrib2*contrib2
820	- contrib1*contrib2)/delta_ijab;
821	}
822	}
823	} // exit b loop
824	} // exit a loop
825	tim_exit("compute ecorr");
826	} // exit j loop
827	} // exit i loop
828
829	if (nsocc == 0 && npass > 1 && pass < npass - 1) {
830	double passe = ecorr_opt2;
831	msg_->sum(passe);
832	ExEnv::out0() << indent
833	<< "Partial correlation energy for pass " << pass << ":" << endl;
834	ExEnv::out0() << indent
835	<< scprintf(" restart_ecorr = %14.10f", passe)
836	<< endl;
837	ExEnv::out0() << indent
838	<< scprintf(" restart_orbital_v2lb = %d", ((pass+1) * ni))
839	<< endl;
840	}
841	} // exit loop over i-batches (pass)
842
843
844
845	// Compute contribution from excitations of the type is1 -> s1a where
846	// i=d.o., s1=s.o. and a=unocc; single excitations of the type i -> a,
847	// where i and a have the same spin, contribute to this term;
848	// (Brillouin's theorem not satisfied for ROHF wave functions);
849	// do this only if nsocc > 0 since gop1 will fail otherwise
850
851	tim_enter("compute ecorr");
852
853	if (nsocc > 0) {
854	tim_enter("global sum mo_int_do_so_vir");
855	msg_->sum(mo_int_do_so_vir,ndoccnsocc(nvir-nsocc),mo_int_tmp);
856	tim_exit("global sum mo_int_do_so_vir");
857	}
858
859	// Add extra contribution for triplet and higher spin multiplicities
860	// contribution = sum over s1 and s2<s1 of (is1\|s1a)*(is2\|s2a)/delta
861
862	if (me == 0 && nsocc) {
863	for (i=0; i<ndocc; i++) {
864
865	for (a=0; a<nvir-nsocc; a++) {
866	delta = evals_open[nsocc+i] - evals_open[nocc+nsocc+a];
867
868	for (s1=0; s1<nsocc; s1++) {
869
870	for (s2=0; s2<s1; s2++) {
871	contrib1 = mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs1)]*
872	mo_int_do_so_vir[a + (nvir-nsocc)(i + ndoccs2)]/delta;
873	ecorr_opt2 += contrib1;
874	ecorr_opt1 += contrib1;
875	}
876	}
877	} // exit a loop
878	} // exit i loop
879	}
880
881	tim_exit("compute ecorr");
882
883	ecorr_zapt2 = ecorr_opt2 + ecorr_zapt2_contrib;
884	ecorr_opt2 += ecorr_opt2_contrib;
885	msg_->sum(ecorr_opt1);
886	msg_->sum(ecorr_opt2);
887	msg_->sum(ecorr_zapt2);
888	msg_->sum(aoint_computed);
889
890	escf = reference_->energy();
891	hf_energy_ = escf;
892
893	if (me == 0) {
894	eopt2 = escf + ecorr_opt2;
895	eopt1 = escf + ecorr_opt1;
896	ezapt2 = escf + ecorr_zapt2;
897
898	// Print out various energies etc.
899	ExEnv::out0() << indent
900	<< "Number of shell quartets for which AO integrals would" << endl
901	<< indent << "have been computed without bounds checking: "
902	<< npassnshellnshell(nshell+1)(nshell+1)/2 << endl;
903	ExEnv::out0() << indent
904	<< "Number of shell quartets for which AO integrals" << endl
905	<< indent << "were computed: " << aoint_computed << endl;
906
907	ExEnv::out0() << indent
908	<< scprintf("ROHF energy [au]: %17.12lf\n", escf);
909	ExEnv::out0() << indent
910	<< scprintf("OPT1 energy [au]: %17.12lf\n", eopt1);
911	ExEnv::out0() << indent
912	<< scprintf("OPT2 second order correction [au]: %17.12lf\n",
913	ecorr_opt2);
914	ExEnv::out0() << indent
915	<< scprintf("OPT2 energy [au]: %17.12lf\n", eopt2);
916	ExEnv::out0() << indent
917	<< scprintf("ZAPT2 correlation energy [au]: %17.12lf\n",
918	ecorr_zapt2);
919	ExEnv::out0() << indent
920	<< scprintf("ZAPT2 energy [au]: %17.12lf\n", ezapt2);
921	ExEnv::out0().flush();
922	}
923
924	msg_->bcast(eopt1);
925	msg_->bcast(eopt2);
926	msg_->bcast(ezapt2);
927
928	if (method_ && !strcmp(method_,"opt1")) {
929	set_energy(eopt1);
930	set_actual_value_accuracy(reference_->actual_value_accuracy()
931	*ref_to_mp2_acc);
932	}
933	else if (method_ && !strcmp(method_,"opt2")) {
934	set_energy(eopt2);
935	set_actual_value_accuracy(reference_->actual_value_accuracy()
936	*ref_to_mp2_acc);
937	}
938	else if (method_ && nsocc == 0 && !strcmp(method_,"mp")) {
939	set_energy(ezapt2);
940	set_actual_value_accuracy(reference_->actual_value_accuracy()
941	*ref_to_mp2_acc);
942	}
943	else {
944	if (!(!method_ \|\| !strcmp(method_,"zapt"))) {
945	ExEnv::out0() << indent
946	<< "MBPT2: bad method: " << method_ << ", using zapt" << endl;
947	}
948	set_energy(ezapt2);
949	set_actual_value_accuracy(reference_->actual_value_accuracy()
950	*ref_to_mp2_acc);
951	}
952
953	free(trans_int1);
954	free(trans_int2);
955	free(trans_int3);
956	free(trans_int4);
957	free(trans_int4_tmp);
958	if (nsocc) free(socc_sum);
959	if (nsocc) free(socc_sum_tmp);
960	if (nsocc) free(mo_int_do_so_vir);
961	if (nsocc) free(mo_int_tmp);
962	free(evals_open);
963	free(myshells);
964	free(shellsize);
965	free (myshellsizes);
966	free (split_info);
967	free(sorted_shells);
968	free(nbf);
969	free(proc);
970
971	delete[] scf_vector;
972	delete[] scf_vector_dat;
973
974	}
975
976	/////////////////////////////////////////////////////////////////
977	// Function iquicksort performs a quick sort (larger -> smaller)
978	// of the integer data in item by the integer indices in index;
979	// data in item remain unchanged
980	/////////////////////////////////////////////////////////////////
981	static void
982	iquicksort(int item,int index,int n)
983	{
984	int i;
985	if (n<=0) return;
986	for (i=0; i<n; i++) {
987	index[i] = i;
988	}
989	iqs(item,index,0,n-1);
990	}
991
992	static void
993	iqs(int item,int index,int left,int right)
994	{
995	register int i,j;
996	int x,y;
997
998	i=left; j=right;
999	x=item[index[(left+right)/2]];
1000
1001	do {
1002	while(item[index[i]]>x && i<right) i++;
1003	while(x>item[index[j]] && j>left) j--;
1004
1005	if (i<=j) {
1006	if (item[index[i]] != item[index[j]]) {
1007	y=index[i];
1008	index[i]=index[j];
1009	index[j]=y;
1010	}
1011	i++; j--;
1012	}
1013	} while(i<=j);
1014
1015	if (left<j) iqs(item,index,left,j);
1016	if (i<right) iqs(item,index,i,right);
1017	}
1018
1019	////////////////////////////////////////////////////////////////////
1020	// Function findprocminmax finds the processor with the most/fewest
1021	// basis functions and the corresponding number of basis functions
1022	////////////////////////////////////////////////////////////////////
1023	static void
1024	findprocminmax(int *nbf, int nproc,
1025	int procmin, int procmax, int minbf, int maxbf)
1026	{
1027	int i;
1028
1029	procmax = procmin = 0;
1030	*maxbf = nbf[0];
1031	*minbf = nbf[0];
1032
1033	for (i=1; i<nproc; i++) {
1034	if (nbf[i] > *maxbf) {
1035	*maxbf = nbf[i];
1036	*procmax = i;
1037	}
1038	if (nbf[i] < *minbf) {
1039	*minbf = nbf[i];
1040	*procmin = i;
1041	}
1042	}
1043	}
1044
1045	/////////////////////////////////////////////////////////////////
1046	// Function findshellmax finds the largest shell on a processor
1047	/////////////////////////////////////////////////////////////////
1048	static void
1049	findshellmax(int myshellsizes, int nRshell, int shellmax, int *shellmaxindex)
1050	{
1051	int i;
1052
1053	*shellmax = myshellsizes[0];
1054	*shellmaxindex = 0;
1055
1056	for (i=1; i<nRshell; i++) {
1057	if (myshellsizes[i] > *shellmax) {
1058	*shellmax = myshellsizes[i];
1059	*shellmaxindex = i;
1060	}
1061	}
1062	}
1063
1064	//////////////////////////////////////////////////////////////
1065	// Function expand_array expands the dimension of an array of
1066	// doubles by 1;
1067	// NB: THE ARRAY MUST HAVE BEEN ALLOCATED WITH MALLOC
1068	//////////////////////////////////////////////////////////////
1069	static void
1070	expandintarray(int *&a, int olddim)
1071	{
1072	int i;
1073	int *tmp;
1074
1075	tmp = (int) malloc((olddim+1)sizeof(int));
1076
1077	for (i=0; i<olddim; i++) {
1078	tmp[i] = a[i];
1079	}
1080	tmp[olddim] = 0;
1081
1082	free(a);
1083
1084	a = tmp;
1085	}
1086
1087	////////////////////////////////////////////////////////////////////////////
1088
1089	// Local Variables:
1090	// mode: c++
1091	// c-file-style: "CLJ-CONDENSED"
1092	// End:

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: ThirdParty/mpqc_open/src/lib/chemistry/qc/mbpt/hsosv2lb.cc@ 72461c

Download in other formats: