xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 5b6bfdb9644f185dbf5e5a09b808ec241507e1e7)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t    ndim_fftw,*dim_fftw;
14 #if defined(PETSC_USE_64BIT_INDICES)
15   fftw_iodim64 *iodims;
16 #else
17   fftw_iodim   *iodims;
18 #endif
19   PetscInt     partial_dim;
20   fftw_plan    p_forward,p_backward;
21   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23                                                             executed for the arrays with which the plan was created */
24 } Mat_FFTW;
25 
26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30 extern PetscErrorCode MatDestroy_FFTW(Mat);
31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32 
33 /* MatMult_SeqFFTW performs forward DFT in parallel
34    Input parameter:
35      A - the matrix
36      x - the vector on which FDFT will be performed
37 
38    Output parameter:
39      y - vector that stores result of FDFT
40 */
41 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
42 {
43   PetscErrorCode ierr;
44   Mat_FFT        *fft  = (Mat_FFT*)A->data;
45   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
46   const PetscScalar *x_array;
47   PetscScalar    *y_array;
48 #if defined(PETSC_USE_COMPLEX)
49 #if defined(PETSC_USE_64BIT_INDICES)
50   fftw_iodim64   *iodims;
51 #else
52   fftw_iodim     *iodims;
53 #endif
54   PetscInt       i;
55 #endif
56   PetscInt       ndim = fft->ndim,*dim = fft->dim;
57 
58   PetscFunctionBegin;
59   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
60   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
61   if (!fftw->p_forward) { /* create a plan, then excute it */
62     switch (ndim) {
63     case 1:
64 #if defined(PETSC_USE_COMPLEX)
65       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
66 #else
67       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
68 #endif
69       break;
70     case 2:
71 #if defined(PETSC_USE_COMPLEX)
72       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
73 #else
74       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
75 #endif
76       break;
77     case 3:
78 #if defined(PETSC_USE_COMPLEX)
79       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
80 #else
81       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
82 #endif
83       break;
84     default:
85 #if defined(PETSC_USE_COMPLEX)
86       iodims = fftw->iodims;
87 #if defined(PETSC_USE_64BIT_INDICES)
88       if (ndim) {
89         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
90         iodims[ndim-1].is = iodims[ndim-1].os = 1;
91         for (i=ndim-2; i>=0; --i) {
92           iodims[i].n = (ptrdiff_t)dim[i];
93           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
94         }
95       }
96       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
97 #else
98       if (ndim) {
99         iodims[ndim-1].n = (int)dim[ndim-1];
100         iodims[ndim-1].is = iodims[ndim-1].os = 1;
101         for (i=ndim-2; i>=0; --i) {
102           iodims[i].n = (int)dim[i];
103           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
104         }
105       }
106       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
107 #endif
108 
109 #else
110       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
111 #endif
112       break;
113     }
114     fftw->finarray  = (PetscScalar *) x_array;
115     fftw->foutarray = y_array;
116     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
117                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
118     fftw_execute(fftw->p_forward);
119   } else { /* use existing plan */
120     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
121 #if defined(PETSC_USE_COMPLEX)
122       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
123 #else
124       fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
125 #endif
126     } else {
127       fftw_execute(fftw->p_forward);
128     }
129   }
130   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
131   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
132   PetscFunctionReturn(0);
133 }
134 
135 /* MatMultTranspose_SeqFFTW performs serial backward DFT
136    Input parameter:
137      A - the matrix
138      x - the vector on which BDFT will be performed
139 
140    Output parameter:
141      y - vector that stores result of BDFT
142 */
143 
144 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
145 {
146   PetscErrorCode ierr;
147   Mat_FFT        *fft  = (Mat_FFT*)A->data;
148   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
149   const PetscScalar *x_array;
150   PetscScalar    *y_array;
151   PetscInt       ndim=fft->ndim,*dim=fft->dim;
152 #if defined(PETSC_USE_COMPLEX)
153 #if defined(PETSC_USE_64BIT_INDICES)
154   fftw_iodim64   *iodims=fftw->iodims;
155 #else
156   fftw_iodim     *iodims=fftw->iodims;
157 #endif
158 #endif
159 
160   PetscFunctionBegin;
161   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
162   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163   if (!fftw->p_backward) { /* create a plan, then excute it */
164     switch (ndim) {
165     case 1:
166 #if defined(PETSC_USE_COMPLEX)
167       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
168 #else
169       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
170 #endif
171       break;
172     case 2:
173 #if defined(PETSC_USE_COMPLEX)
174       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
175 #else
176       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
177 #endif
178       break;
179     case 3:
180 #if defined(PETSC_USE_COMPLEX)
181       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
182 #else
183       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
184 #endif
185       break;
186     default:
187 #if defined(PETSC_USE_COMPLEX)
188 #if defined(PETSC_USE_64BIT_INDICES)
189       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
190 #else
191       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
192 #endif
193 #else
194       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
195 #endif
196       break;
197     }
198     fftw->binarray  = (PetscScalar *) x_array;
199     fftw->boutarray = y_array;
200     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
201   } else { /* use existing plan */
202     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
203 #if defined(PETSC_USE_COMPLEX)
204       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
205 #else
206       fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
207 #endif
208     } else {
209       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
210     }
211   }
212   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
213   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
214   PetscFunctionReturn(0);
215 }
216 
217 /* MatMult_MPIFFTW performs forward DFT in parallel
218    Input parameter:
219      A - the matrix
220      x - the vector on which FDFT will be performed
221 
222    Output parameter:
223    y   - vector that stores result of FDFT
224 */
225 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
226 {
227   PetscErrorCode ierr;
228   Mat_FFT        *fft  = (Mat_FFT*)A->data;
229   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
230   const PetscScalar *x_array;
231   PetscScalar    *y_array;
232   PetscInt       ndim=fft->ndim,*dim=fft->dim;
233   MPI_Comm       comm;
234 
235   PetscFunctionBegin;
236   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
237   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
238   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
239   if (!fftw->p_forward) { /* create a plan, then excute it */
240     switch (ndim) {
241     case 1:
242 #if defined(PETSC_USE_COMPLEX)
243       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
244 #else
245       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
246 #endif
247       break;
248     case 2:
249 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
250       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
251 #else
252       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
253 #endif
254       break;
255     case 3:
256 #if defined(PETSC_USE_COMPLEX)
257       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
258 #else
259       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
260 #endif
261       break;
262     default:
263 #if defined(PETSC_USE_COMPLEX)
264       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
265 #else
266       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
267 #endif
268       break;
269     }
270     fftw->finarray  = (PetscScalar *) x_array;
271     fftw->foutarray = y_array;
272     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
273                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
274     fftw_execute(fftw->p_forward);
275   } else { /* use existing plan */
276     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
277       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
278     } else {
279       fftw_execute(fftw->p_forward);
280     }
281   }
282   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
283   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
288    Input parameter:
289      A - the matrix
290      x - the vector on which BDFT will be performed
291 
292    Output parameter:
293      y - vector that stores result of BDFT
294 */
295 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
296 {
297   PetscErrorCode ierr;
298   Mat_FFT        *fft  = (Mat_FFT*)A->data;
299   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
300   const PetscScalar *x_array;
301   PetscScalar    *y_array;
302   PetscInt       ndim=fft->ndim,*dim=fft->dim;
303   MPI_Comm       comm;
304 
305   PetscFunctionBegin;
306   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
307   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
308   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
309   if (!fftw->p_backward) { /* create a plan, then excute it */
310     switch (ndim) {
311     case 1:
312 #if defined(PETSC_USE_COMPLEX)
313       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
314 #else
315       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
316 #endif
317       break;
318     case 2:
319 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
320       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
321 #else
322       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
323 #endif
324       break;
325     case 3:
326 #if defined(PETSC_USE_COMPLEX)
327       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
328 #else
329       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
330 #endif
331       break;
332     default:
333 #if defined(PETSC_USE_COMPLEX)
334       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
335 #else
336       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
337 #endif
338       break;
339     }
340     fftw->binarray  = (PetscScalar *) x_array;
341     fftw->boutarray = y_array;
342     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
343   } else { /* use existing plan */
344     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
345       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
346     } else {
347       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
348     }
349   }
350   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
351   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
352   PetscFunctionReturn(0);
353 }
354 
355 PetscErrorCode MatDestroy_FFTW(Mat A)
356 {
357   Mat_FFT        *fft  = (Mat_FFT*)A->data;
358   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
359   PetscErrorCode ierr;
360 
361   PetscFunctionBegin;
362   fftw_destroy_plan(fftw->p_forward);
363   fftw_destroy_plan(fftw->p_backward);
364   if (fftw->iodims) {
365     free(fftw->iodims);
366   }
367   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
368   ierr = PetscFree(fft->data);CHKERRQ(ierr);
369   fftw_mpi_cleanup();
370   PetscFunctionReturn(0);
371 }
372 
373 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
374 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
375 {
376   PetscErrorCode ierr;
377   PetscScalar    *array;
378 
379   PetscFunctionBegin;
380   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
381   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
382   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
383   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
384   PetscFunctionReturn(0);
385 }
386 
387 /*@
388    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
389      parallel layout determined by FFTW
390 
391    Collective on Mat
392 
393    Input Parameter:
394 .   A - the matrix
395 
396    Output Parameter:
397 +   x - (optional) input vector of forward FFTW
398 -   y - (optional) output vector of forward FFTW
399 -   z - (optional) output vector of backward FFTW
400 
401   Level: advanced
402 
403   Note: The parallel layout of output of forward FFTW is always same as the input
404         of the backward FFTW. But parallel layout of the input vector of forward
405         FFTW might not be same as the output of backward FFTW.
406         Also note that we need to provide enough space while doing parallel real transform.
407         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
408         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
409         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
410         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
411         zeros if it is odd only one zero is needed.
412         Lastly one needs some scratch space at the end of data set in each process. alloc_local
413         figures out how much space is needed, i.e. it figures out the data+scratch space for
414         each processor and returns that.
415 
416 .seealso: MatCreateFFTW()
417 @*/
418 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
419 {
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
428 {
429   PetscErrorCode ierr;
430   PetscMPIInt    size,rank;
431   MPI_Comm       comm;
432   Mat_FFT        *fft  = (Mat_FFT*)A->data;
433   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
434   PetscInt       N     = fft->N;
435   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
436 
437   PetscFunctionBegin;
438   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
439   PetscValidType(A,1);
440   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
441 
442   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
443   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
444   if (size == 1) { /* sequential case */
445 #if defined(PETSC_USE_COMPLEX)
446     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
447     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
448     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
449 #else
450     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
451     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
452     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
453 #endif
454   } else { /* parallel cases */
455     ptrdiff_t    alloc_local,local_n0,local_0_start;
456     ptrdiff_t    local_n1;
457     fftw_complex *data_fout;
458     ptrdiff_t    local_1_start;
459 #if defined(PETSC_USE_COMPLEX)
460     fftw_complex *data_fin,*data_bout;
461 #else
462     double    *data_finr,*data_boutr;
463     PetscInt  n1,N1;
464     ptrdiff_t temp;
465 #endif
466 
467     switch (ndim) {
468     case 1:
469 #if !defined(PETSC_USE_COMPLEX)
470       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
471 #else
472       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
473       if (fin) {
474         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
475         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
476 
477         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
478       }
479       if (fout) {
480         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
481         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
482 
483         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
484       }
485       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
486       if (bout) {
487         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
488         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
489 
490         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
491       }
492       break;
493 #endif
494     case 2:
495 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
496       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
497       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
498       if (fin) {
499         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
500         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
501 
502         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
503       }
504       if (bout) {
505         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
506         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
507 
508         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
509       }
510       if (fout) {
511         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
512         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
513 
514         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
515       }
516 #else
517       /* Get local size */
518       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
519       if (fin) {
520         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
521         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
522 
523         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
524       }
525       if (fout) {
526         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
527         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
528 
529         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
530       }
531       if (bout) {
532         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
533         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
534 
535         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
536       }
537 #endif
538       break;
539     case 3:
540 #if !defined(PETSC_USE_COMPLEX)
541       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
542       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
543       if (fin) {
544         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
545         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
546 
547         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
548       }
549       if (bout) {
550         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
551         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
552 
553         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
554       }
555 
556       if (fout) {
557         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
558         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
559 
560         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
561       }
562 #else
563       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
564       if (fin) {
565         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
566         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
567 
568         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
569       }
570       if (fout) {
571         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
572         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
573 
574         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
575       }
576       if (bout) {
577         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
578         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
579 
580         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
581       }
582 #endif
583       break;
584     default:
585 #if !defined(PETSC_USE_COMPLEX)
586       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
587 
588       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
589 
590       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
591       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
592 
593       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
594 
595       if (fin) {
596         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
597         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
598 
599         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
600       }
601       if (bout) {
602         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
603         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
604 
605         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
606       }
607 
608       if (fout) {
609         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
610         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
611 
612         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
613       }
614 #else
615       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
616       if (fin) {
617         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
618         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
619 
620         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
621       }
622       if (fout) {
623         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
624         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
625 
626         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
627       }
628       if (bout) {
629         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
630         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
631 
632         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
633       }
634 #endif
635       break;
636     }
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 /*@
642    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
643 
644    Collective on Mat
645 
646    Input Parameters:
647 +  A - FFTW matrix
648 -  x - the PETSc vector
649 
650    Output Parameters:
651 .  y - the FFTW vector
652 
653   Options Database Keys:
654 . -mat_fftw_plannerflags - set FFTW planner flags
655 
656    Level: intermediate
657 
658    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
659          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
660          zeros. This routine does that job by scattering operation.
661 
662 .seealso: VecScatterFFTWToPetsc()
663 @*/
664 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
665 {
666   PetscErrorCode ierr;
667 
668   PetscFunctionBegin;
669   ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
670   PetscFunctionReturn(0);
671 }
672 
673 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
674 {
675   PetscErrorCode ierr;
676   MPI_Comm       comm;
677   Mat_FFT        *fft  = (Mat_FFT*)A->data;
678   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
679   PetscInt       N     =fft->N;
680   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
681   PetscInt       low;
682   PetscMPIInt    rank,size;
683   PetscInt       vsize,vsize1;
684   ptrdiff_t      local_n0,local_0_start;
685   ptrdiff_t      local_n1,local_1_start;
686   VecScatter     vecscat;
687   IS             list1,list2;
688 #if !defined(PETSC_USE_COMPLEX)
689   PetscInt       i,j,k,partial_dim;
690   PetscInt       *indx1, *indx2, tempindx, tempindx1;
691   PetscInt       NM;
692   ptrdiff_t      temp;
693 #endif
694 
695   PetscFunctionBegin;
696   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
697   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
698   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
699   ierr = VecGetOwnershipRange(y,&low,NULL);
700 
701   if (size==1) {
702     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
703     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
704     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
705     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
706     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
707     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
708     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
709     ierr = ISDestroy(&list1);CHKERRQ(ierr);
710   } else {
711     switch (ndim) {
712     case 1:
713 #if defined(PETSC_USE_COMPLEX)
714       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
715 
716       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
717       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
718       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
719       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
721       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
722       ierr = ISDestroy(&list1);CHKERRQ(ierr);
723       ierr = ISDestroy(&list2);CHKERRQ(ierr);
724 #else
725       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
726 #endif
727       break;
728     case 2:
729 #if defined(PETSC_USE_COMPLEX)
730       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
731 
732       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
733       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
734       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
735       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
736       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
737       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
738       ierr = ISDestroy(&list1);CHKERRQ(ierr);
739       ierr = ISDestroy(&list2);CHKERRQ(ierr);
740 #else
741       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
742 
743       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
744       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
745 
746       if (dim[1]%2==0) {
747         NM = dim[1]+2;
748       } else {
749         NM = dim[1]+1;
750       }
751       for (i=0; i<local_n0; i++) {
752         for (j=0; j<dim[1]; j++) {
753           tempindx  = i*dim[1] + j;
754           tempindx1 = i*NM + j;
755 
756           indx1[tempindx]=local_0_start*dim[1]+tempindx;
757           indx2[tempindx]=low+tempindx1;
758         }
759       }
760 
761       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
762       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
763 
764       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
765       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
766       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
767       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
768       ierr = ISDestroy(&list1);CHKERRQ(ierr);
769       ierr = ISDestroy(&list2);CHKERRQ(ierr);
770       ierr = PetscFree(indx1);CHKERRQ(ierr);
771       ierr = PetscFree(indx2);CHKERRQ(ierr);
772 #endif
773       break;
774 
775     case 3:
776 #if defined(PETSC_USE_COMPLEX)
777       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
778 
779       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
780       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
781       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
782       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
783       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
784       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
785       ierr = ISDestroy(&list1);CHKERRQ(ierr);
786       ierr = ISDestroy(&list2);CHKERRQ(ierr);
787 #else
788       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
789       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
790       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
791 
792       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
793       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
794 
795       if (dim[2]%2==0) NM = dim[2]+2;
796       else             NM = dim[2]+1;
797 
798       for (i=0; i<local_n0; i++) {
799         for (j=0; j<dim[1]; j++) {
800           for (k=0; k<dim[2]; k++) {
801             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
802             tempindx1 = i*dim[1]*NM + j*NM + k;
803 
804             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
805             indx2[tempindx]=low+tempindx1;
806           }
807         }
808       }
809 
810       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
811       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
812       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
813       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
814       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
815       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
816       ierr = ISDestroy(&list1);CHKERRQ(ierr);
817       ierr = ISDestroy(&list2);CHKERRQ(ierr);
818       ierr = PetscFree(indx1);CHKERRQ(ierr);
819       ierr = PetscFree(indx2);CHKERRQ(ierr);
820 #endif
821       break;
822 
823     default:
824 #if defined(PETSC_USE_COMPLEX)
825       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
826 
827       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
828       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
829       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
830       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
831       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
833       ierr = ISDestroy(&list1);CHKERRQ(ierr);
834       ierr = ISDestroy(&list2);CHKERRQ(ierr);
835 #else
836       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
837       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
838       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
839 
840       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
841 
842       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
843 
844       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
845 
846       partial_dim = fftw->partial_dim;
847 
848       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
849       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
850 
851       if (dim[ndim-1]%2==0) NM = 2;
852       else                  NM = 1;
853 
854       j = low;
855       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
856         indx1[i] = local_0_start*partial_dim + i;
857         indx2[i] = j;
858         if (k%dim[ndim-1]==0) j+=NM;
859         j++;
860       }
861       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
862       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
863       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
864       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
866       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
867       ierr = ISDestroy(&list1);CHKERRQ(ierr);
868       ierr = ISDestroy(&list2);CHKERRQ(ierr);
869       ierr = PetscFree(indx1);CHKERRQ(ierr);
870       ierr = PetscFree(indx2);CHKERRQ(ierr);
871 #endif
872       break;
873     }
874   }
875   PetscFunctionReturn(0);
876 }
877 
878 /*@
879    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
880 
881    Collective on Mat
882 
883     Input Parameters:
884 +   A - FFTW matrix
885 -   x - FFTW vector
886 
887    Output Parameters:
888 .  y - PETSc vector
889 
890    Level: intermediate
891 
892    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
893          VecScatterFFTWToPetsc removes those extra zeros.
894 
895 .seealso: VecScatterPetscToFFTW()
896 @*/
897 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
898 {
899   PetscErrorCode ierr;
900 
901   PetscFunctionBegin;
902   ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
903   PetscFunctionReturn(0);
904 }
905 
906 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
907 {
908   PetscErrorCode ierr;
909   MPI_Comm       comm;
910   Mat_FFT        *fft  = (Mat_FFT*)A->data;
911   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
912   PetscInt       N     = fft->N;
913   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
914   PetscInt       low;
915   PetscMPIInt    rank,size;
916   ptrdiff_t      local_n0,local_0_start;
917   ptrdiff_t      local_n1,local_1_start;
918   VecScatter     vecscat;
919   IS             list1,list2;
920 #if !defined(PETSC_USE_COMPLEX)
921   PetscInt       i,j,k,partial_dim;
922   PetscInt       *indx1, *indx2, tempindx, tempindx1;
923   PetscInt       NM;
924   ptrdiff_t      temp;
925 #endif
926 
927   PetscFunctionBegin;
928   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
929   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
930   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
931   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
932 
933   if (size==1) {
934     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
935     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
936     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
937     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
938     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
939     ierr = ISDestroy(&list1);CHKERRQ(ierr);
940 
941   } else {
942     switch (ndim) {
943     case 1:
944 #if defined(PETSC_USE_COMPLEX)
945       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
946 
947       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
948       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
949       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
950       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
953       ierr = ISDestroy(&list1);CHKERRQ(ierr);
954       ierr = ISDestroy(&list2);CHKERRQ(ierr);
955 #else
956       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
957 #endif
958       break;
959     case 2:
960 #if defined(PETSC_USE_COMPLEX)
961       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
962 
963       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
964       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
965       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
966       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
969       ierr = ISDestroy(&list1);CHKERRQ(ierr);
970       ierr = ISDestroy(&list2);CHKERRQ(ierr);
971 #else
972       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
973 
974       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
975       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
976 
977       if (dim[1]%2==0) NM = dim[1]+2;
978       else             NM = dim[1]+1;
979 
980       for (i=0; i<local_n0; i++) {
981         for (j=0; j<dim[1]; j++) {
982           tempindx = i*dim[1] + j;
983           tempindx1 = i*NM + j;
984 
985           indx1[tempindx]=local_0_start*dim[1]+tempindx;
986           indx2[tempindx]=low+tempindx1;
987         }
988       }
989 
990       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
991       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
992 
993       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
994       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
995       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
997       ierr = ISDestroy(&list1);CHKERRQ(ierr);
998       ierr = ISDestroy(&list2);CHKERRQ(ierr);
999       ierr = PetscFree(indx1);CHKERRQ(ierr);
1000       ierr = PetscFree(indx2);CHKERRQ(ierr);
1001 #endif
1002       break;
1003     case 3:
1004 #if defined(PETSC_USE_COMPLEX)
1005       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1006 
1007       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1008       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1009       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1010       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1012       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1013       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1014       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1015 #else
1016       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1017 
1018       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1019       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1020 
1021       if (dim[2]%2==0) NM = dim[2]+2;
1022       else             NM = dim[2]+1;
1023 
1024       for (i=0; i<local_n0; i++) {
1025         for (j=0; j<dim[1]; j++) {
1026           for (k=0; k<dim[2]; k++) {
1027             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1028             tempindx1 = i*dim[1]*NM + j*NM + k;
1029 
1030             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1031             indx2[tempindx]=low+tempindx1;
1032           }
1033         }
1034       }
1035 
1036       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1037       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1038 
1039       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1040       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1042       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1043       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1044       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1045       ierr = PetscFree(indx1);CHKERRQ(ierr);
1046       ierr = PetscFree(indx2);CHKERRQ(ierr);
1047 #endif
1048       break;
1049     default:
1050 #if defined(PETSC_USE_COMPLEX)
1051       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1052 
1053       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1054       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1055       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1056       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1057       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1059       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1060       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1061 #else
1062       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1063 
1064       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1065 
1066       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1067 
1068       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1069 
1070       partial_dim = fftw->partial_dim;
1071 
1072       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1073       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1074 
1075       if (dim[ndim-1]%2==0) NM = 2;
1076       else                  NM = 1;
1077 
1078       j = low;
1079       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1080         indx1[i] = local_0_start*partial_dim + i;
1081         indx2[i] = j;
1082         if (k%dim[ndim-1]==0) j+=NM;
1083         j++;
1084       }
1085       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1086       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1087 
1088       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1089       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1091       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1092       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1093       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1094       ierr = PetscFree(indx1);CHKERRQ(ierr);
1095       ierr = PetscFree(indx2);CHKERRQ(ierr);
1096 #endif
1097       break;
1098     }
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 /*
1104     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1105 
1106   Options Database Keys:
1107 + -mat_fftw_plannerflags - set FFTW planner flags
1108 
1109    Level: intermediate
1110 
1111 */
1112 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1113 {
1114   PetscErrorCode ierr;
1115   MPI_Comm       comm;
1116   Mat_FFT        *fft=(Mat_FFT*)A->data;
1117   Mat_FFTW       *fftw;
1118   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1119   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1120   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1121   PetscBool      flg;
1122   PetscInt       p_flag,partial_dim=1,ctr;
1123   PetscMPIInt    size,rank;
1124   ptrdiff_t      *pdim;
1125   ptrdiff_t      local_n1,local_1_start;
1126 #if !defined(PETSC_USE_COMPLEX)
1127   ptrdiff_t      temp;
1128   PetscInt       N1,tot_dim;
1129 #else
1130   PetscInt       n1;
1131 #endif
1132 
1133   PetscFunctionBegin;
1134   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1135   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1136   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1137 
1138   fftw_mpi_init();
1139   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1140   pdim[0] = dim[0];
1141 #if !defined(PETSC_USE_COMPLEX)
1142   tot_dim = 2*dim[0];
1143 #endif
1144   for (ctr=1; ctr<ndim; ctr++) {
1145     partial_dim *= dim[ctr];
1146     pdim[ctr]    = dim[ctr];
1147 #if !defined(PETSC_USE_COMPLEX)
1148     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1149     else             tot_dim *= dim[ctr];
1150 #endif
1151   }
1152 
1153   if (size == 1) {
1154 #if defined(PETSC_USE_COMPLEX)
1155     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1156     n    = N;
1157 #else
1158     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1159     n    = tot_dim;
1160 #endif
1161 
1162   } else {
1163     ptrdiff_t local_n0,local_0_start;
1164     switch (ndim) {
1165     case 1:
1166 #if !defined(PETSC_USE_COMPLEX)
1167       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1168 #else
1169       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1170 
1171       n    = (PetscInt)local_n0;
1172       n1   = (PetscInt)local_n1;
1173       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1174 #endif
1175       break;
1176     case 2:
1177 #if defined(PETSC_USE_COMPLEX)
1178       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1179       /*
1180        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1181        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1182        */
1183       n    = (PetscInt)local_n0*dim[1];
1184       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1185 #else
1186       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1187 
1188       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1189       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1190 #endif
1191       break;
1192     case 3:
1193 #if defined(PETSC_USE_COMPLEX)
1194       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1195 
1196       n    = (PetscInt)local_n0*dim[1]*dim[2];
1197       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1198 #else
1199       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1200 
1201       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1202       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1203 #endif
1204       break;
1205     default:
1206 #if defined(PETSC_USE_COMPLEX)
1207       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1208 
1209       n    = (PetscInt)local_n0*partial_dim;
1210       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1211 #else
1212       temp = pdim[ndim-1];
1213 
1214       pdim[ndim-1] = temp/2 + 1;
1215 
1216       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1217 
1218       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1219       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1220 
1221       pdim[ndim-1] = temp;
1222 
1223       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1224 #endif
1225       break;
1226     }
1227   }
1228   free(pdim);
1229   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1230   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1231   fft->data = (void*)fftw;
1232 
1233   fft->n            = n;
1234   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1235   fftw->partial_dim = partial_dim;
1236 
1237   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1238   if (size == 1) {
1239 #if defined(PETSC_USE_64BIT_INDICES)
1240     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1241 #else
1242     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1243 #endif
1244   }
1245 
1246   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1247 
1248   fftw->p_forward  = 0;
1249   fftw->p_backward = 0;
1250   fftw->p_flag     = FFTW_ESTIMATE;
1251 
1252   if (size == 1) {
1253     A->ops->mult          = MatMult_SeqFFTW;
1254     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1255   } else {
1256     A->ops->mult          = MatMult_MPIFFTW;
1257     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1258   }
1259   fft->matdestroy = MatDestroy_FFTW;
1260   A->assembled    = PETSC_TRUE;
1261   A->preallocated = PETSC_TRUE;
1262 
1263   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr);
1264   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1265   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1266 
1267   /* get runtime options */
1268   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1269   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1270   if (flg) {
1271     fftw->p_flag = iplans[p_flag];
1272   }
1273   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 
1278 
1279 
1280