xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision c3eba89ad7dd2bb911ce41d64181a8292c3fd878)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   fftw_iodim  *iodims;
15   PetscInt    partial_dim;
16   fftw_plan   p_forward,p_backward;
17   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
18   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
19                                                             executed for the arrays with which the plan was created */
20 } Mat_FFTW;
21 
22 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
26 extern PetscErrorCode MatDestroy_FFTW(Mat);
27 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
28 
29 /* MatMult_SeqFFTW performs forward DFT in parallel
30    Input parameter:
31      A - the matrix
32      x - the vector on which FDFT will be performed
33 
34    Output parameter:
35      y - vector that stores result of FDFT
36 */
37 #undef __FUNCT__
38 #define __FUNCT__ "MatMult_SeqFFTW"
39 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
40 {
41   PetscErrorCode ierr;
42   Mat_FFT        *fft  = (Mat_FFT*)A->data;
43   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
44   PetscScalar    *x_array,*y_array;
45   fftw_iodim     *iodims;
46   PetscInt       ndim = fft->ndim,*dim = fft->dim,i;
47 
48   PetscFunctionBegin;
49   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
50   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
51   if (!fftw->p_forward) { /* create a plan, then excute it */
52     switch (ndim) {
53     case 1:
54 #if defined(PETSC_USE_COMPLEX)
55       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
56 #else
57       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
58 #endif
59       break;
60     case 2:
61 #if defined(PETSC_USE_COMPLEX)
62       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
63 #else
64       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
65 #endif
66       break;
67     case 3:
68 #if defined(PETSC_USE_COMPLEX)
69       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
70 #else
71       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
72 #endif
73       break;
74     default:
75 #if defined(PETSC_USE_COMPLEX)
76       iodims = (fftw_iodim*)fftw->iodims;
77       if (ndim) {
78         iodims[ndim-1].n = dim[ndim-1];
79         iodims[ndim-1].is = iodims[ndim-1].os = 1;
80         for (i=ndim-2; i>=0; --i) {
81           iodims[i].n = dim[i];
82           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
83         }
84       }
85       fftw->p_forward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
86 #else
87       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
88 #endif
89       break;
90     }
91     fftw->finarray  = x_array;
92     fftw->foutarray = y_array;
93     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
94                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
95     fftw_execute(fftw->p_forward);
96   } else { /* use existing plan */
97     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
98       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
99     } else {
100       fftw_execute(fftw->p_forward);
101     }
102   }
103   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
104   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
105   PetscFunctionReturn(0);
106 }
107 
108 /* MatMultTranspose_SeqFFTW performs serial backward DFT
109    Input parameter:
110      A - the matrix
111      x - the vector on which BDFT will be performed
112 
113    Output parameter:
114      y - vector that stores result of BDFT
115 */
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
119 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
120 {
121   PetscErrorCode ierr;
122   Mat_FFT        *fft  = (Mat_FFT*)A->data;
123   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
124   PetscScalar    *x_array,*y_array;
125   PetscInt       ndim=fft->ndim,*dim=fft->dim;
126   fftw_iodim     *iodims=(fftw_iodim*)fftw->iodims;
127 
128   PetscFunctionBegin;
129   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
130   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
131   if (!fftw->p_backward) { /* create a plan, then excute it */
132     switch (ndim) {
133     case 1:
134 #if defined(PETSC_USE_COMPLEX)
135       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
136 #else
137       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
138 #endif
139       break;
140     case 2:
141 #if defined(PETSC_USE_COMPLEX)
142       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
143 #else
144       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
145 #endif
146       break;
147     case 3:
148 #if defined(PETSC_USE_COMPLEX)
149       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
150 #else
151       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
152 #endif
153       break;
154     default:
155 #if defined(PETSC_USE_COMPLEX)
156       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
157 #else
158       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
159 #endif
160       break;
161     }
162     fftw->binarray  = x_array;
163     fftw->boutarray = y_array;
164     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
165   } else { /* use existing plan */
166     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
167       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
168     } else {
169       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
170     }
171   }
172   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
173   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 /* MatMult_MPIFFTW performs forward DFT in parallel
178    Input parameter:
179      A - the matrix
180      x - the vector on which FDFT will be performed
181 
182    Output parameter:
183    y   - vector that stores result of FDFT
184 */
185 #undef __FUNCT__
186 #define __FUNCT__ "MatMult_MPIFFTW"
187 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
188 {
189   PetscErrorCode ierr;
190   Mat_FFT        *fft  = (Mat_FFT*)A->data;
191   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
192   PetscScalar    *x_array,*y_array;
193   PetscInt       ndim=fft->ndim,*dim=fft->dim;
194   MPI_Comm       comm;
195 
196   PetscFunctionBegin;
197   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
198   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
199   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
200   if (!fftw->p_forward) { /* create a plan, then excute it */
201     switch (ndim) {
202     case 1:
203 #if defined(PETSC_USE_COMPLEX)
204       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
205 #else
206       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
207 #endif
208       break;
209     case 2:
210 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
211       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
212 #else
213       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
214 #endif
215       break;
216     case 3:
217 #if defined(PETSC_USE_COMPLEX)
218       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
219 #else
220       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
221 #endif
222       break;
223     default:
224 #if defined(PETSC_USE_COMPLEX)
225       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
226 #else
227       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
228 #endif
229       break;
230     }
231     fftw->finarray  = x_array;
232     fftw->foutarray = y_array;
233     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
234                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
235     fftw_execute(fftw->p_forward);
236   } else { /* use existing plan */
237     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
238       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
239     } else {
240       fftw_execute(fftw->p_forward);
241     }
242   }
243   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
244   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
249    Input parameter:
250      A - the matrix
251      x - the vector on which BDFT will be performed
252 
253    Output parameter:
254      y - vector that stores result of BDFT
255 */
256 #undef __FUNCT__
257 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
258 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
259 {
260   PetscErrorCode ierr;
261   Mat_FFT        *fft  = (Mat_FFT*)A->data;
262   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
263   PetscScalar    *x_array,*y_array;
264   PetscInt       ndim=fft->ndim,*dim=fft->dim;
265   MPI_Comm       comm;
266 
267   PetscFunctionBegin;
268   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
269   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
270   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
271   if (!fftw->p_backward) { /* create a plan, then excute it */
272     switch (ndim) {
273     case 1:
274 #if defined(PETSC_USE_COMPLEX)
275       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
276 #else
277       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
278 #endif
279       break;
280     case 2:
281 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
282       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
283 #else
284       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
285 #endif
286       break;
287     case 3:
288 #if defined(PETSC_USE_COMPLEX)
289       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
290 #else
291       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
292 #endif
293       break;
294     default:
295 #if defined(PETSC_USE_COMPLEX)
296       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
297 #else
298       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
299 #endif
300       break;
301     }
302     fftw->binarray  = x_array;
303     fftw->boutarray = y_array;
304     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
305   } else { /* use existing plan */
306     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
307       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
308     } else {
309       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
310     }
311   }
312   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
313   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatDestroy_FFTW"
319 PetscErrorCode MatDestroy_FFTW(Mat A)
320 {
321   Mat_FFT        *fft  = (Mat_FFT*)A->data;
322   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
323   PetscErrorCode ierr;
324 
325   PetscFunctionBegin;
326   fftw_destroy_plan(fftw->p_forward);
327   fftw_destroy_plan(fftw->p_backward);
328   if (fftw->iodims) {
329     /* ierr = PetscFree(fftw->iodims);CHKERRQ(ierr); */
330     free(fftw->iodims);
331   }
332   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
333   ierr = PetscFree(fft->data);CHKERRQ(ierr);
334   fftw_mpi_cleanup();
335   PetscFunctionReturn(0);
336 }
337 
338 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
339 #undef __FUNCT__
340 #define __FUNCT__ "VecDestroy_MPIFFTW"
341 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
342 {
343   PetscErrorCode ierr;
344   PetscScalar    *array;
345 
346   PetscFunctionBegin;
347   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
348   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
349   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
350   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "MatGetVecsFFTW"
356 /*@
357    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
358      parallel layout determined by FFTW
359 
360    Collective on Mat
361 
362    Input Parameter:
363 .   A - the matrix
364 
365    Output Parameter:
366 +   x - (optional) input vector of forward FFTW
367 -   y - (optional) output vector of forward FFTW
368 -   z - (optional) output vector of backward FFTW
369 
370   Level: advanced
371 
372   Note: The parallel layout of output of forward FFTW is always same as the input
373         of the backward FFTW. But parallel layout of the input vector of forward
374         FFTW might not be same as the output of backward FFTW.
375         Also note that we need to provide enough space while doing parallel real transform.
376         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
377         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
378         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
379         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
380         zeros if it is odd only one zero is needed.
381         Lastly one needs some scratch space at the end of data set in each process. alloc_local
382         figures out how much space is needed, i.e. it figures out the data+scratch space for
383         each processor and returns that.
384 
385 .seealso: MatCreateFFTW()
386 @*/
387 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
388 {
389   PetscErrorCode ierr;
390 
391   PetscFunctionBegin;
392   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
393   PetscFunctionReturn(0);
394 }
395 
396 #undef __FUNCT__
397 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
398 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
399 {
400   PetscErrorCode ierr;
401   PetscMPIInt    size,rank;
402   MPI_Comm       comm;
403   Mat_FFT        *fft  = (Mat_FFT*)A->data;
404   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
405   PetscInt       N     = fft->N;
406   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
407 
408   PetscFunctionBegin;
409   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
410   PetscValidType(A,1);
411   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
412 
413   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
414   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
415   if (size == 1) { /* sequential case */
416 #if defined(PETSC_USE_COMPLEX)
417     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
418     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
419     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
420 #else
421     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
422     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
423     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
424 #endif
425   } else { /* parallel cases */
426     ptrdiff_t    alloc_local,local_n0,local_0_start;
427     ptrdiff_t    local_n1;
428     fftw_complex *data_fout;
429     ptrdiff_t    local_1_start;
430 #if defined(PETSC_USE_COMPLEX)
431     fftw_complex *data_fin,*data_bout;
432 #else
433     double    *data_finr,*data_boutr;
434     PetscInt  n1,N1;
435     ptrdiff_t temp;
436 #endif
437 
438     switch (ndim) {
439     case 1:
440 #if !defined(PETSC_USE_COMPLEX)
441       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
442 #else
443       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
444       if (fin) {
445         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
446         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
447 
448         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
449       }
450       if (fout) {
451         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
452         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
453 
454         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
455       }
456       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
457       if (bout) {
458         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
459         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
460 
461         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
462       }
463       break;
464 #endif
465     case 2:
466 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
467       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
468       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
469       if (fin) {
470         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
471         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
472 
473         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
474       }
475       if (bout) {
476         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
477         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
478 
479         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
480       }
481       if (fout) {
482         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
483         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
484 
485         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
486       }
487 #else
488       /* Get local size */
489       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
490       if (fin) {
491         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
493 
494         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
495       }
496       if (fout) {
497         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
498         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
499 
500         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
501       }
502       if (bout) {
503         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
504         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
505 
506         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
507       }
508 #endif
509       break;
510     case 3:
511 #if !defined(PETSC_USE_COMPLEX)
512       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
513       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
514       if (fin) {
515         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
516         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
517 
518         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
519       }
520       if (bout) {
521         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
522         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
523 
524         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
525       }
526 
527       if (fout) {
528         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
529         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
530 
531         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
532       }
533 #else
534       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
535       if (fin) {
536         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
538 
539         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
540       }
541       if (fout) {
542         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
543         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
544 
545         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
546       }
547       if (bout) {
548         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
549         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
550 
551         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
552       }
553 #endif
554       break;
555     default:
556 #if !defined(PETSC_USE_COMPLEX)
557       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
558 
559       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
560 
561       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
562       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
563 
564       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
565 
566       if (fin) {
567         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
568         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
569 
570         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
571       }
572       if (bout) {
573         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
574         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
575 
576         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
577       }
578 
579       if (fout) {
580         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
581         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
582 
583         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
584       }
585 #else
586       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
587       if (fin) {
588         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
589         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
590 
591         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
592       }
593       if (fout) {
594         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
595         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
596 
597         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
598       }
599       if (bout) {
600         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
601         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
602 
603         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
604       }
605 #endif
606       break;
607     }
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "VecScatterPetscToFFTW"
614 /*@
615    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
616 
617    Collective on Mat
618 
619    Input Parameters:
620 +  A - FFTW matrix
621 -  x - the PETSc vector
622 
623    Output Parameters:
624 .  y - the FFTW vector
625 
626   Options Database Keys:
627 . -mat_fftw_plannerflags - set FFTW planner flags
628 
629    Level: intermediate
630 
631    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
632          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
633          zeros. This routine does that job by scattering operation.
634 
635 .seealso: VecScatterFFTWToPetsc()
636 @*/
637 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
638 {
639   PetscErrorCode ierr;
640 
641   PetscFunctionBegin;
642   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
643   PetscFunctionReturn(0);
644 }
645 
646 #undef __FUNCT__
647 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
648 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
649 {
650   PetscErrorCode ierr;
651   MPI_Comm       comm;
652   Mat_FFT        *fft  = (Mat_FFT*)A->data;
653   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
654   PetscInt       N     =fft->N;
655   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
656   PetscInt       low;
657   PetscMPIInt    rank,size;
658   PetscInt       vsize,vsize1;
659   ptrdiff_t      local_n0,local_0_start;
660   ptrdiff_t      local_n1,local_1_start;
661   VecScatter     vecscat;
662   IS             list1,list2;
663 #if !defined(PETSC_USE_COMPLEX)
664   PetscInt       i,j,k,partial_dim;
665   PetscInt       *indx1, *indx2, tempindx, tempindx1;
666   PetscInt       N1, n1,NM;
667   ptrdiff_t      temp;
668 #endif
669 
670   PetscFunctionBegin;
671   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
672   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
673   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
674   ierr = VecGetOwnershipRange(y,&low,NULL);
675 
676   if (size==1) {
677     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
678     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
679     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
680     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
681     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
682     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
683     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
684     ierr = ISDestroy(&list1);CHKERRQ(ierr);
685   } else {
686     switch (ndim) {
687     case 1:
688 #if defined(PETSC_USE_COMPLEX)
689       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
690 
691       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
692       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
693       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
694       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
695       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
696       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
697       ierr = ISDestroy(&list1);CHKERRQ(ierr);
698       ierr = ISDestroy(&list2);CHKERRQ(ierr);
699 #else
700       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
701 #endif
702       break;
703     case 2:
704 #if defined(PETSC_USE_COMPLEX)
705       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
706 
707       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
708       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
709       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
710       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
711       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
712       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
713       ierr = ISDestroy(&list1);CHKERRQ(ierr);
714       ierr = ISDestroy(&list2);CHKERRQ(ierr);
715 #else
716       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
717 
718       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
719       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
720       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
721 
722       if (dim[1]%2==0) {
723         NM = dim[1]+2;
724       } else {
725         NM = dim[1]+1;
726       }
727       for (i=0; i<local_n0; i++) {
728         for (j=0; j<dim[1]; j++) {
729           tempindx  = i*dim[1] + j;
730           tempindx1 = i*NM + j;
731 
732           indx1[tempindx]=local_0_start*dim[1]+tempindx;
733           indx2[tempindx]=low+tempindx1;
734         }
735       }
736 
737       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
738       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
739 
740       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
741       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
743       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
744       ierr = ISDestroy(&list1);CHKERRQ(ierr);
745       ierr = ISDestroy(&list2);CHKERRQ(ierr);
746       ierr = PetscFree(indx1);CHKERRQ(ierr);
747       ierr = PetscFree(indx2);CHKERRQ(ierr);
748 #endif
749       break;
750 
751     case 3:
752 #if defined(PETSC_USE_COMPLEX)
753       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
754 
755       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
756       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
757       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
758       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
759       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
760       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
761       ierr = ISDestroy(&list1);CHKERRQ(ierr);
762       ierr = ISDestroy(&list2);CHKERRQ(ierr);
763 #else
764       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
765 
766       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
767 
768       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
769       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
770 
771       if (dim[2]%2==0) NM = dim[2]+2;
772       else             NM = dim[2]+1;
773 
774       for (i=0; i<local_n0; i++) {
775         for (j=0; j<dim[1]; j++) {
776           for (k=0; k<dim[2]; k++) {
777             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
778             tempindx1 = i*dim[1]*NM + j*NM + k;
779 
780             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
781             indx2[tempindx]=low+tempindx1;
782           }
783         }
784       }
785 
786       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
787       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
788       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
789       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
790       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
791       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
792       ierr = ISDestroy(&list1);CHKERRQ(ierr);
793       ierr = ISDestroy(&list2);CHKERRQ(ierr);
794       ierr = PetscFree(indx1);CHKERRQ(ierr);
795       ierr = PetscFree(indx2);CHKERRQ(ierr);
796 #endif
797       break;
798 
799     default:
800 #if defined(PETSC_USE_COMPLEX)
801       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
802 
803       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
804       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
805       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
806       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
807       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
808       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
809       ierr = ISDestroy(&list1);CHKERRQ(ierr);
810       ierr = ISDestroy(&list2);CHKERRQ(ierr);
811 #else
812       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
813 
814       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
815 
816       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
817 
818       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
819 
820       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
821 
822       partial_dim = fftw->partial_dim;
823 
824       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
825       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
826 
827       if (dim[ndim-1]%2==0) NM = 2;
828       else                  NM = 1;
829 
830       j = low;
831       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
832         indx1[i] = local_0_start*partial_dim + i;
833         indx2[i] = j;
834         if (k%dim[ndim-1]==0) j+=NM;
835         j++;
836       }
837       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
838       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
839       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
840       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
841       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
843       ierr = ISDestroy(&list1);CHKERRQ(ierr);
844       ierr = ISDestroy(&list2);CHKERRQ(ierr);
845       ierr = PetscFree(indx1);CHKERRQ(ierr);
846       ierr = PetscFree(indx2);CHKERRQ(ierr);
847 #endif
848       break;
849     }
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "VecScatterFFTWToPetsc"
856 /*@
857    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
858 
859    Collective on Mat
860 
861     Input Parameters:
862 +   A - FFTW matrix
863 -   x - FFTW vector
864 
865    Output Parameters:
866 .  y - PETSc vector
867 
868    Level: intermediate
869 
870    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
871          VecScatterFFTWToPetsc removes those extra zeros.
872 
873 .seealso: VecScatterPetscToFFTW()
874 @*/
875 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
876 {
877   PetscErrorCode ierr;
878 
879   PetscFunctionBegin;
880   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
886 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
887 {
888   PetscErrorCode ierr;
889   MPI_Comm       comm;
890   Mat_FFT        *fft  = (Mat_FFT*)A->data;
891   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
892   PetscInt       N     = fft->N;
893   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
894   PetscInt       low;
895   PetscMPIInt    rank,size;
896   ptrdiff_t      local_n0,local_0_start;
897   ptrdiff_t      local_n1,local_1_start;
898   VecScatter     vecscat;
899   IS             list1,list2;
900 #if !defined(PETSC_USE_COMPLEX)
901   PetscInt  i,j,k,partial_dim;
902   PetscInt  *indx1, *indx2, tempindx, tempindx1;
903   PetscInt  N1, n1,NM;
904   ptrdiff_t temp;
905 #endif
906 
907   PetscFunctionBegin;
908   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
909   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
910   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
911   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
912 
913   if (size==1) {
914     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
915     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
916     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
919     ierr = ISDestroy(&list1);CHKERRQ(ierr);
920 
921   } else {
922     switch (ndim) {
923     case 1:
924 #if defined(PETSC_USE_COMPLEX)
925       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
926 
927       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
928       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
929       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
930       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
932       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
933       ierr = ISDestroy(&list1);CHKERRQ(ierr);
934       ierr = ISDestroy(&list2);CHKERRQ(ierr);
935 #else
936       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
937 #endif
938       break;
939     case 2:
940 #if defined(PETSC_USE_COMPLEX)
941       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
942 
943       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
944       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
945       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
946       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
949       ierr = ISDestroy(&list1);CHKERRQ(ierr);
950       ierr = ISDestroy(&list2);CHKERRQ(ierr);
951 #else
952       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
953 
954       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
955 
956       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
957       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
958 
959       if (dim[1]%2==0) NM = dim[1]+2;
960       else             NM = dim[1]+1;
961 
962       for (i=0; i<local_n0; i++) {
963         for (j=0; j<dim[1]; j++) {
964           tempindx = i*dim[1] + j;
965           tempindx1 = i*NM + j;
966 
967           indx1[tempindx]=local_0_start*dim[1]+tempindx;
968           indx2[tempindx]=low+tempindx1;
969         }
970       }
971 
972       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
973       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
974 
975       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
976       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
977       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
979       ierr = ISDestroy(&list1);CHKERRQ(ierr);
980       ierr = ISDestroy(&list2);CHKERRQ(ierr);
981       ierr = PetscFree(indx1);CHKERRQ(ierr);
982       ierr = PetscFree(indx2);CHKERRQ(ierr);
983 #endif
984       break;
985     case 3:
986 #if defined(PETSC_USE_COMPLEX)
987       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
988 
989       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
990       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
991       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
992       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
995       ierr = ISDestroy(&list1);CHKERRQ(ierr);
996       ierr = ISDestroy(&list2);CHKERRQ(ierr);
997 #else
998 
999       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1000 
1001       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1002 
1003       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1004       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1005 
1006       if (dim[2]%2==0) NM = dim[2]+2;
1007       else             NM = dim[2]+1;
1008 
1009       for (i=0; i<local_n0; i++) {
1010         for (j=0; j<dim[1]; j++) {
1011           for (k=0; k<dim[2]; k++) {
1012             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1013             tempindx1 = i*dim[1]*NM + j*NM + k;
1014 
1015             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1016             indx2[tempindx]=low+tempindx1;
1017           }
1018         }
1019       }
1020 
1021       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1022       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1023 
1024       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1025       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1028       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1029       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1030       ierr = PetscFree(indx1);CHKERRQ(ierr);
1031       ierr = PetscFree(indx2);CHKERRQ(ierr);
1032 #endif
1033       break;
1034     default:
1035 #if defined(PETSC_USE_COMPLEX)
1036       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1037 
1038       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1039       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1040       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1041       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1043       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1044       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1045       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1046 #else
1047       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1048 
1049       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1050 
1051       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1052 
1053       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1054 
1055       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1056 
1057       partial_dim = fftw->partial_dim;
1058 
1059       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1060       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1061 
1062       if (dim[ndim-1]%2==0) NM = 2;
1063       else                  NM = 1;
1064 
1065       j = low;
1066       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1067         indx1[i] = local_0_start*partial_dim + i;
1068         indx2[i] = j;
1069         if (k%dim[ndim-1]==0) j+=NM;
1070         j++;
1071       }
1072       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1073       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1074 
1075       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1076       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1077       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1078       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1079       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1080       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1081       ierr = PetscFree(indx1);CHKERRQ(ierr);
1082       ierr = PetscFree(indx2);CHKERRQ(ierr);
1083 #endif
1084       break;
1085     }
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatCreate_FFTW"
1092 /*
1093     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1094 
1095   Options Database Keys:
1096 + -mat_fftw_plannerflags - set FFTW planner flags
1097 
1098    Level: intermediate
1099 
1100 */
1101 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1102 {
1103   PetscErrorCode ierr;
1104   MPI_Comm       comm;
1105   Mat_FFT        *fft=(Mat_FFT*)A->data;
1106   Mat_FFTW       *fftw;
1107   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1108   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1109   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1110   PetscBool      flg;
1111   PetscInt       p_flag,partial_dim=1,ctr;
1112   PetscMPIInt    size,rank;
1113   ptrdiff_t      *pdim;
1114   ptrdiff_t      local_n1,local_1_start;
1115 #if !defined(PETSC_USE_COMPLEX)
1116   ptrdiff_t temp;
1117   PetscInt  N1,tot_dim;
1118 #else
1119   PetscInt n1;
1120 #endif
1121 
1122   PetscFunctionBegin;
1123   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1124   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1125   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1126 
1127   fftw_mpi_init();
1128   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1129   pdim[0] = dim[0];
1130 #if !defined(PETSC_USE_COMPLEX)
1131   tot_dim = 2*dim[0];
1132 #endif
1133   for (ctr=1; ctr<ndim; ctr++) {
1134     partial_dim *= dim[ctr];
1135     pdim[ctr]    = dim[ctr];
1136 #if !defined(PETSC_USE_COMPLEX)
1137     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1138     else             tot_dim *= dim[ctr];
1139 #endif
1140   }
1141 
1142   if (size == 1) {
1143 #if defined(PETSC_USE_COMPLEX)
1144     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1145     n    = N;
1146 #else
1147     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1148     n    = tot_dim;
1149 #endif
1150 
1151   } else {
1152     ptrdiff_t local_n0,local_0_start;
1153     switch (ndim) {
1154     case 1:
1155 #if !defined(PETSC_USE_COMPLEX)
1156       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1157 #else
1158       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1159 
1160       n    = (PetscInt)local_n0;
1161       n1   = (PetscInt)local_n1;
1162       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1163 #endif
1164       break;
1165     case 2:
1166 #if defined(PETSC_USE_COMPLEX)
1167       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1168       /*
1169        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1170        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1171        */
1172       n    = (PetscInt)local_n0*dim[1];
1173       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1174 #else
1175       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1176 
1177       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1178       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1179 #endif
1180       break;
1181     case 3:
1182 #if defined(PETSC_USE_COMPLEX)
1183       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1184 
1185       n    = (PetscInt)local_n0*dim[1]*dim[2];
1186       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1187 #else
1188       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1189 
1190       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1191       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1192 #endif
1193       break;
1194     default:
1195 #if defined(PETSC_USE_COMPLEX)
1196       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1197 
1198       n    = (PetscInt)local_n0*partial_dim;
1199       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1200 #else
1201       temp = pdim[ndim-1];
1202 
1203       pdim[ndim-1] = temp/2 + 1;
1204 
1205       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1206 
1207       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1208       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1209 
1210       pdim[ndim-1] = temp;
1211 
1212       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1213 #endif
1214       break;
1215     }
1216   }
1217   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1218   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1219   fft->data = (void*)fftw;
1220 
1221   fft->n            = n;
1222   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1223   fftw->partial_dim = partial_dim;
1224 
1225   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1226   if (size == 1) {
1227     /* ierr = PetscMalloc(ndim, &(fftw->iodims));CHKERRQ(ierr);  error! */
1228     /* ierr = PetscMalloc(ndim*sizeof(fftw_iodim),&(fftw->iodims));CHKERRQ(ierr); -not sure if this is ok */
1229     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1230   }
1231 
1232   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1233 
1234   fftw->p_forward  = 0;
1235   fftw->p_backward = 0;
1236   fftw->p_flag     = FFTW_ESTIMATE;
1237 
1238   if (size == 1) {
1239     A->ops->mult          = MatMult_SeqFFTW;
1240     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1241   } else {
1242     A->ops->mult          = MatMult_MPIFFTW;
1243     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1244   }
1245   fft->matdestroy = MatDestroy_FFTW;
1246   A->assembled    = PETSC_TRUE;
1247   A->preallocated = PETSC_TRUE;
1248 
1249   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1252 
1253   /* get runtime options */
1254   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1255   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1256   if (flg) {
1257     fftw->p_flag = iplans[p_flag];
1258   }
1259   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 
1264 
1265 
1266