xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 8f1a2a5e609bf3d0ec1c7e7bf09507165b68d274)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt    partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 
28 /* MatMult_SeqFFTW performs forward DFT in parallel
29 
30    Input parameter:
31    mat - the matrix
32    x   - the vector on which FDFT will be performed
33 
34    Output parameter:
35    y   - vector that stores result of FDFT
36 
37 */
38 #undef __FUNCT__
39 #define __FUNCT__ "MatMult_SeqFFTW"
40 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
41 {
42   PetscErrorCode ierr;
43   Mat_FFT        *fft  = (Mat_FFT*)A->data;
44   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
45   PetscScalar    *x_array,*y_array;
46   PetscInt       ndim=fft->ndim,*dim=fft->dim;
47 
48   PetscFunctionBegin;
49   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
50   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
51   if (!fftw->p_forward){ /* create a plan, then excute it */
52     switch (ndim){
53     case 1:
54 #if defined(PETSC_USE_COMPLEX)
55       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
56 #else
57       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
58 #endif
59       break;
60     case 2:
61 #if defined(PETSC_USE_COMPLEX)
62       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
63 #else
64       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
65 #endif
66       break;
67     case 3:
68 #if defined(PETSC_USE_COMPLEX)
69       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
70 #else
71       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
72 #endif
73       break;
74     default:
75 #if defined(PETSC_USE_COMPLEX)
76       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
77 #else
78       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
79 #endif
80       break;
81     }
82     fftw->finarray  = x_array;
83     fftw->foutarray = y_array;
84     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
85                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
86     fftw_execute(fftw->p_forward);
87   } else { /* use existing plan */
88     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
89       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
90     } else {
91       fftw_execute(fftw->p_forward);
92     }
93   }
94   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
95   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 /* MatMultTranspose_SeqFFTW performs serial backward DFT
100 
101    Input parameter:
102    mat - the matrix
103    x   - the vector on which BDFT will be performed
104 
105    Output parameter:
106    y   - vector that stores result of BDFT
107 
108 */
109 
110 #undef __FUNCT__
111 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
112 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
113 {
114   PetscErrorCode ierr;
115   Mat_FFT        *fft = (Mat_FFT*)A->data;
116   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
117   PetscScalar    *x_array,*y_array;
118   PetscInt       ndim=fft->ndim,*dim=fft->dim;
119 
120   PetscFunctionBegin;
121   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
122   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
123   if (!fftw->p_backward){ /* create a plan, then excute it */
124     switch (ndim){
125     case 1:
126 #if defined(PETSC_USE_COMPLEX)
127       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
128 #else
129       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
130 #endif
131       break;
132     case 2:
133 #if defined(PETSC_USE_COMPLEX)
134       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
135 #else
136       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
137 #endif
138       break;
139     case 3:
140 #if defined(PETSC_USE_COMPLEX)
141       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
142 #else
143       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
144 #endif
145       break;
146     default:
147 #if defined(PETSC_USE_COMPLEX)
148       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
149 #else
150       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
151 #endif
152       break;
153     }
154     fftw->binarray  = x_array;
155     fftw->boutarray = y_array;
156     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
157   } else { /* use existing plan */
158     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
159       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
160     } else {
161       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
162     }
163   }
164   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
165   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 /* MatMult_MPIFFTW performs forward DFT in parallel
170 
171    Input parameter:
172    mat - the matrix
173    x   - the vector on which FDFT will be performed
174 
175    Output parameter:
176    y   - vector that stores result of FDFT
177 
178 */
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatMult_MPIFFTW"
182 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
183 {
184   PetscErrorCode ierr;
185   Mat_FFT        *fft  = (Mat_FFT*)A->data;
186   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
187   PetscScalar    *x_array,*y_array;
188   PetscInt       ndim=fft->ndim,*dim=fft->dim;
189   MPI_Comm       comm=((PetscObject)A)->comm;
190 
191   PetscFunctionBegin;
192   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
193   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
194   if (!fftw->p_forward){ /* create a plan, then excute it */
195     switch (ndim){
196     case 1:
197 #if defined(PETSC_USE_COMPLEX)
198       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
199 #else
200       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
201 #endif
202       break;
203     case 2:
204 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
205       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
206 #else
207       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
208 #endif
209       break;
210     case 3:
211 #if defined(PETSC_USE_COMPLEX)
212       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
213 #else
214       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
215 #endif
216       break;
217     default:
218 #if defined(PETSC_USE_COMPLEX)
219       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
220 #else
221       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
222 #endif
223       break;
224     }
225     fftw->finarray  = x_array;
226     fftw->foutarray = y_array;
227     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
228                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
229     fftw_execute(fftw->p_forward);
230   } else { /* use existing plan */
231     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
232       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
233     } else {
234       fftw_execute(fftw->p_forward);
235     }
236   }
237   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
238   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
243 
244    Input parameter:
245    mat - the matrix
246    x   - the vector on which BDFT will be performed
247 
248    Output parameter:
249    y   - vector that stores result of BDFT
250 
251 */
252 #undef __FUNCT__
253 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
254 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
255 {
256   PetscErrorCode ierr;
257   Mat_FFT        *fft  = (Mat_FFT*)A->data;
258   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
259   PetscScalar    *x_array,*y_array;
260   PetscInt       ndim=fft->ndim,*dim=fft->dim;
261   MPI_Comm       comm=((PetscObject)A)->comm;
262 
263   PetscFunctionBegin;
264   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
265   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
266   if (!fftw->p_backward){ /* create a plan, then excute it */
267     switch (ndim){
268     case 1:
269 #if defined(PETSC_USE_COMPLEX)
270       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
271 #else
272       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
273 #endif
274       break;
275     case 2:
276 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
277       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
278 #else
279       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
280 #endif
281       break;
282     case 3:
283 #if defined(PETSC_USE_COMPLEX)
284       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
285 #else
286       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
287 #endif
288       break;
289     default:
290 #if defined(PETSC_USE_COMPLEX)
291       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
292 #else
293       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
294 #endif
295       break;
296     }
297     fftw->binarray  = x_array;
298     fftw->boutarray = y_array;
299     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
300   } else { /* use existing plan */
301     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
302       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
303     } else {
304       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
305     }
306   }
307   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
308   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatDestroy_FFTW"
314 PetscErrorCode MatDestroy_FFTW(Mat A)
315 {
316   Mat_FFT        *fft = (Mat_FFT*)A->data;
317   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
318   PetscErrorCode ierr;
319 
320   PetscFunctionBegin;
321   fftw_destroy_plan(fftw->p_forward);
322   fftw_destroy_plan(fftw->p_backward);
323   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
324   ierr = PetscFree(fft->data);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
329 #undef __FUNCT__
330 #define __FUNCT__ "VecDestroy_MPIFFTW"
331 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
332 {
333   PetscErrorCode  ierr;
334   PetscScalar     *array;
335 
336   PetscFunctionBegin;
337   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
338   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
339   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
340   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 
345 #undef __FUNCT__
346 #define __FUNCT__ "MatGetVecsFFTW"
347 /*@
348    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
349      parallel layout determined by FFTW
350 
351    Collective on Mat
352 
353    Input Parameter:
354 .  mat - the matrix
355 
356    Output Parameter:
357 +   fin - (optional) input vector of forward FFTW
358 -   fout - (optional) output vector of forward FFTW
359 -   bout - (optional) output vector of backward FFTW
360 
361   Level: advanced
362   Note: The parallel layout of output of forward FFTW is always same as the input
363         of the backward FFTW. But parallel layout of the input vector of forward
364         FFTW might not be same as the output of backward FFTW.
365         Also note that we need to provide enough space while doing parallel real transform.
366         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
367         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
368         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
369         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
370         zeros if it is odd only one zero is needed.
371         Lastly one needs some scratch space at the end of data set in each process. alloc_local
372         figures out how much space is needed, i.e. it figures out the data+scratch space for
373         each processor and returns that.
374 
375 .seealso: MatCreateFFTW()
376 @*/
377 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
378 {
379   PetscErrorCode ierr;
380 
381   PetscFunctionBegin;
382   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
383   PetscFunctionReturn(0);
384 }
385 
386 EXTERN_C_BEGIN
387 #undef __FUNCT__
388 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
389 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
390 {
391   PetscErrorCode ierr;
392   PetscMPIInt    size,rank;
393   MPI_Comm       comm=((PetscObject)A)->comm;
394   Mat_FFT        *fft = (Mat_FFT*)A->data;
395   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
396   PetscInt       N=fft->N;
397   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
398 
399   PetscFunctionBegin;
400   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
401   PetscValidType(A,1);
402 
403   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
404   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
405   if (size == 1){ /* sequential case */
406 #if defined(PETSC_USE_COMPLEX)
407     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
408     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
409     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
410 #else
411     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
412     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
413     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
414 #endif
415   } else { /* parallel cases */
416     ptrdiff_t      alloc_local,local_n0,local_0_start;
417     ptrdiff_t      local_n1;
418     fftw_complex   *data_fout;
419     ptrdiff_t      local_1_start;
420 #if defined(PETSC_USE_COMPLEX)
421     fftw_complex   *data_fin,*data_bout;
422 #else
423     double         *data_finr,*data_boutr;
424     PetscInt       n1,N1,vsize;
425     ptrdiff_t      temp;
426 #endif
427 
428     switch (ndim){
429           case 1:
430 #if !defined(PETSC_USE_COMPLEX)
431                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
432 #else
433                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
434                  if (fin) {
435                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
436                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
437                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
438                          }
439                  if (fout) {
440                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
441                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
442                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
443                          }
444                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
445                  if (bout) {
446                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
447                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
448                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
449                          }
450           break;
451 #endif
452           case 2:
453 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
454                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
455                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
456                  if (fin) {
457                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
458                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
459                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
460                           }
461                  if (bout) {
462                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
463                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
464                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
465                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
466                           }
467                  //n1 = 2*local_n1*dim[0];
468                  if (fout) {
469                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
470                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
471                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
472                            }
473 #else
474       /* Get local size */
475                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
476                  if (fin) {
477                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
478                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
479                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
480                           }
481                  if (fout) {
482                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
483                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
484                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
485                            }
486                  if (bout) {
487                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
488                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
489                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
490                           }
491 #endif
492           break;
493           case 3:
494 #if !defined(PETSC_USE_COMPLEX)
495                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
496                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
497                  if (fin) {
498                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
499                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
500                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
501                          }
502                  if (bout) {
503                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
504                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
505                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
506                           }
507                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
508                  if (fout) {
509                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
510                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
511                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
512                           }
513 #else
514                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
515                  if (fin) {
516                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
517                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
518                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
519                          }
520                  if (fout) {
521                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
522                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
523                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
524                          }
525                  if (bout) {
526                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
527                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
528                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
529                          }
530 #endif
531           break;
532           default:
533 #if !defined(PETSC_USE_COMPLEX)
534                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
535                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
536                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
537                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
538                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
539 
540                  if (fin) {
541                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
542                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
543                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
544                         }
545                  if (bout) {
546                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
547                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
548                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
549                         }
550                  //temp = fftw->partial_dim;
551                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
552                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
553                  if (fout) {
554                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
555                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
556                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
557                         }
558 #else
559                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
560                 if (fin) {
561                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
562                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
563                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
564                        }
565                 if (fout) {
566                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
567                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
568                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
569                        }
570                 if (bout) {
571                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
572                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
573                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
574                        }
575 #endif
576             break;
577           }
578   }
579   PetscFunctionReturn(0);
580 }
581 EXTERN_C_END
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "VecScatterPetscToFFTW"
585 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
586 {
587   PetscErrorCode ierr;
588   PetscFunctionBegin;
589   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 }
592 
593 /*
594       VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block.
595   Input A, x, y
596   A - FFTW matrix
597   x - user data
598   Options Database Keys:
599 + -mat_fftw_plannerflags - set FFTW planner flags
600 
601    Level: intermediate
602    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
603          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
604          zeros. This routine does that job by scattering operation.
605 
606 
607 */
608 
609 EXTERN_C_BEGIN
610 #undef __FUNCT__
611 #define __FUNCT__ "VecScatterPetscToFFTW_FTTW"
612 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
613 {
614   PetscErrorCode ierr;
615   MPI_Comm       comm=((PetscObject)A)->comm;
616   Mat_FFT        *fft = (Mat_FFT*)A->data;
617   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
618   PetscInt       N=fft->N;
619   PetscInt       ndim=fft->ndim,*dim=fft->dim;
620   PetscInt       low;
621   PetscInt       rank,size,vsize,vsize1;
622   ptrdiff_t      alloc_local,local_n0,local_0_start;
623   ptrdiff_t      local_n1,local_1_start;
624   VecScatter     vecscat;
625   IS             list1,list2;
626 #if !defined(PETSC_USE_COMPLEX)
627   PetscInt       i,j,k,partial_dim;
628   PetscInt       *indx1, *indx2, tempindx, tempindx1;
629   PetscInt       N1, n1 ,NM;
630   ptrdiff_t      temp;
631 #endif
632 
633   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
634   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
635   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
636 
637   if (size==1)
638     {
639           ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
640           ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
641           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
642           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
643           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
644           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
645           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
646           ierr = ISDestroy(&list1);CHKERRQ(ierr);
647     }
648 
649  else{
650 
651  switch (ndim){
652  case 1:
653 #if defined(PETSC_USE_COMPLEX)
654       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
655       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
656       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
657       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
658       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
659       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
660       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
661       ierr = ISDestroy(&list1);CHKERRQ(ierr);
662       ierr = ISDestroy(&list2);CHKERRQ(ierr);
663 #else
664       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
665 #endif
666  break;
667  case 2:
668 #if defined(PETSC_USE_COMPLEX)
669       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
670       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
671       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
672       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
673       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
674       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
675       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
676       ierr = ISDestroy(&list1);CHKERRQ(ierr);
677       ierr = ISDestroy(&list2);CHKERRQ(ierr);
678 #else
679       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
680       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
681       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
682       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
683       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
684       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
685 
686       if (dim[1]%2==0)
687         NM = dim[1]+2;
688       else
689         NM = dim[1]+1;
690 
691       for (i=0;i<local_n0;i++){
692          for (j=0;j<dim[1];j++){
693             tempindx = i*dim[1] + j;
694             tempindx1 = i*NM + j;
695             indx1[tempindx]=local_0_start*dim[1]+tempindx;
696             indx2[tempindx]=low+tempindx1;
697         }
698      }
699 
700       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
701       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
702 
703       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
704       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
705       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
706       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
707       ierr = ISDestroy(&list1);CHKERRQ(ierr);
708       ierr = ISDestroy(&list2);CHKERRQ(ierr);
709       ierr = PetscFree(indx1);CHKERRQ(ierr);
710       ierr = PetscFree(indx2);CHKERRQ(ierr);
711 #endif
712  break;
713 
714  case 3:
715 #if defined(PETSC_USE_COMPLEX)
716       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
717       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
718       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
719       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
720       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
721       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
723       ierr = ISDestroy(&list1);CHKERRQ(ierr);
724       ierr = ISDestroy(&list2);CHKERRQ(ierr);
725 #else
726       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
727       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
728 
729       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
730       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
731 
732       if (dim[2]%2==0)
733         NM = dim[2]+2;
734       else
735         NM = dim[2]+1;
736 
737       for (i=0;i<local_n0;i++){
738          for (j=0;j<dim[1];j++){
739             for (k=0;k<dim[2];k++){
740                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
741                tempindx1 = i*dim[1]*NM + j*NM + k;
742                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
743                indx2[tempindx]=low+tempindx1;
744             }
745          }
746       }
747 
748       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
749       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
750       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
751       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
753       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
754       ierr = ISDestroy(&list1);CHKERRQ(ierr);
755       ierr = ISDestroy(&list2);CHKERRQ(ierr);
756       ierr = PetscFree(indx1);CHKERRQ(ierr);
757       ierr = PetscFree(indx2);CHKERRQ(ierr);
758 #endif
759  break;
760 
761  default:
762 #if defined(PETSC_USE_COMPLEX)
763       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
764       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
765       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
766       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
767       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
768       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
769       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
770       ierr = ISDestroy(&list1);CHKERRQ(ierr);
771       ierr = ISDestroy(&list2);CHKERRQ(ierr);
772 #else
773       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
774       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
775       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
776       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
777       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
778 
779       partial_dim = fftw->partial_dim;
780 
781       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
782       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
783 
784       if (dim[ndim-1]%2==0)
785         NM = 2;
786       else
787         NM = 1;
788 
789       j = low;
790       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
791          {
792           indx1[i] = local_0_start*partial_dim + i;
793           indx2[i] = j;
794           if (k%dim[ndim-1]==0)
795             { j+=NM;}
796           j++;
797          }
798       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
799       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
800       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
801       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
802       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
803       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
804       ierr = ISDestroy(&list1);CHKERRQ(ierr);
805       ierr = ISDestroy(&list2);CHKERRQ(ierr);
806       ierr = PetscFree(indx1);CHKERRQ(ierr);
807       ierr = PetscFree(indx2);CHKERRQ(ierr);
808 #endif
809  break;
810   }
811  }
812  return(0);
813 }
814 EXTERN_C_END
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "VecScatterFFTWToPetsc"
818 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
819 {
820   PetscErrorCode ierr;
821   PetscFunctionBegin;
822   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
823   PetscFunctionReturn(0);
824 }
825 
826 /*
827       VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use.
828   Input A, x, y
829   A - FFTW matrix
830   x - FFTW vector
831   y - PETSc vector that user can read
832 
833    Level: intermediate
834    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
835          VecScatterFFTWToPetsc removes those extra zeros.
836 
837 */
838 
839 EXTERN_C_BEGIN
840 #undef __FUNCT__
841 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
842 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
843 {
844   PetscErrorCode ierr;
845   MPI_Comm       comm=((PetscObject)A)->comm;
846   Mat_FFT        *fft = (Mat_FFT*)A->data;
847   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
848   PetscInt       N=fft->N;
849   PetscInt       ndim=fft->ndim,*dim=fft->dim;
850   PetscInt       low;
851   PetscInt       rank,size;
852   ptrdiff_t      alloc_local,local_n0,local_0_start;
853   ptrdiff_t      local_n1,local_1_start;
854   VecScatter     vecscat;
855   IS             list1,list2;
856 #if !defined(PETSC_USE_COMPLEX)
857   PetscInt       i,j,k,partial_dim;
858   PetscInt       *indx1, *indx2, tempindx, tempindx1;
859   PetscInt       N1, n1 ,NM;
860   ptrdiff_t      temp;
861 #endif
862 
863 
864   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
865   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
866   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
867 
868   if (size==1){
869     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
870     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
871     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
874     ierr = ISDestroy(&list1);CHKERRQ(ierr);
875   }
876   else{
877 
878   switch (ndim){
879   case 1:
880 #if defined(PETSC_USE_COMPLEX)
881       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
882       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
883       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
884       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
885       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
888       ierr = ISDestroy(&list1);CHKERRQ(ierr);
889       ierr = ISDestroy(&list2);CHKERRQ(ierr);
890 #else
891       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
892 #endif
893   break;
894   case 2:
895 #if defined(PETSC_USE_COMPLEX)
896       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
897       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
898       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
899       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
900       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
902       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
903       ierr = ISDestroy(&list1);CHKERRQ(ierr);
904       ierr = ISDestroy(&list2);CHKERRQ(ierr);
905 #else
906       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
907       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
908 
909       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
910       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
911 
912       if (dim[1]%2==0)
913         NM = dim[1]+2;
914       else
915         NM = dim[1]+1;
916 
917       for (i=0;i<local_n0;i++){
918          for (j=0;j<dim[1];j++){
919             tempindx = i*dim[1] + j;
920             tempindx1 = i*NM + j;
921             indx1[tempindx]=local_0_start*dim[1]+tempindx;
922             indx2[tempindx]=low+tempindx1;
923          }
924        }
925 
926        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
927        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
928 
929        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
930        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
932        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
933        ierr = ISDestroy(&list1);CHKERRQ(ierr);
934        ierr = ISDestroy(&list2);CHKERRQ(ierr);
935        ierr = PetscFree(indx1);CHKERRQ(ierr);
936        ierr = PetscFree(indx2);CHKERRQ(ierr);
937 #endif
938   break;
939   case 3:
940 #if defined(PETSC_USE_COMPLEX)
941        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
942        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
943        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
944        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
945        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
948        ierr = ISDestroy(&list1);CHKERRQ(ierr);
949        ierr = ISDestroy(&list2);CHKERRQ(ierr);
950 #else
951 
952        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
953        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
954 
955 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
956 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
957        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
958        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
959 
960        if (dim[2]%2==0)
961         NM = dim[2]+2;
962        else
963         NM = dim[2]+1;
964 
965        for (i=0;i<local_n0;i++){
966           for (j=0;j<dim[1];j++){
967              for (k=0;k<dim[2];k++){
968                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
969                 tempindx1 = i*dim[1]*NM + j*NM + k;
970                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
971                 indx2[tempindx]=low+tempindx1;
972              }
973           }
974        }
975 
976        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
977        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
978 
979        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
980        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
982        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
983        ierr = ISDestroy(&list1);CHKERRQ(ierr);
984        ierr = ISDestroy(&list2);CHKERRQ(ierr);
985        ierr = PetscFree(indx1);CHKERRQ(ierr);
986        ierr = PetscFree(indx2);CHKERRQ(ierr);
987 #endif
988   break;
989   default:
990 #if defined(PETSC_USE_COMPLEX)
991        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
992        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
993        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
994        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
995        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
998        ierr = ISDestroy(&list1);CHKERRQ(ierr);
999        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1000 #else
1001        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1002        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1003        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1004        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1005        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1006 
1007        partial_dim = fftw->partial_dim;
1008 
1009        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1010        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1011 
1012        if (dim[ndim-1]%2==0)
1013          NM = 2;
1014        else
1015          NM = 1;
1016 
1017        j = low;
1018        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1019           {
1020            indx1[i] = local_0_start*partial_dim + i;
1021            indx2[i] = j;
1022            if (k%dim[ndim-1]==0)
1023              { j+=NM;}
1024            j++;
1025           }
1026        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1027        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1028 
1029        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1030        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1031        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1033        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1034        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1035        ierr = PetscFree(indx1);CHKERRQ(ierr);
1036        ierr = PetscFree(indx2);CHKERRQ(ierr);
1037 #endif
1038   break;
1039   }
1040   }
1041   return 0;
1042 }
1043 EXTERN_C_END
1044 
1045 EXTERN_C_BEGIN
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatCreate_FFTW"
1048 /*
1049       MatCreate_FFTW - Creates a matrix object that provides FFT
1050   via the external package FFTW
1051   Options Database Keys:
1052 + -mat_fftw_plannerflags - set FFTW planner flags
1053 
1054    Level: intermediate
1055 
1056 */
1057 
1058 PetscErrorCode MatCreate_FFTW(Mat A)
1059 {
1060   PetscErrorCode ierr;
1061   MPI_Comm       comm=((PetscObject)A)->comm;
1062   Mat_FFT        *fft=(Mat_FFT*)A->data;
1063   Mat_FFTW       *fftw;
1064   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1065   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1066   PetscBool      flg;
1067   PetscInt       p_flag,partial_dim=1,ctr;
1068   PetscMPIInt    size,rank;
1069   ptrdiff_t      *pdim;
1070   ptrdiff_t      local_n1,local_1_start;
1071 #if !defined(PETSC_USE_COMPLEX)
1072    ptrdiff_t     temp;
1073    PetscInt      N1,tot_dim;
1074 #else
1075    PetscInt n1;
1076 #endif
1077 
1078   PetscFunctionBegin;
1079   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1080   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1081 
1082   fftw_mpi_init();
1083   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1084   pdim[0] = dim[0];
1085 #if !defined(PETSC_USE_COMPLEX)
1086   tot_dim = 2*dim[0];
1087 #endif
1088   for (ctr=1;ctr<ndim;ctr++)
1089      {
1090           partial_dim *= dim[ctr];
1091           pdim[ctr] = dim[ctr];
1092 #if !defined(PETSC_USE_COMPLEX)
1093           if (ctr==ndim-1)
1094             tot_dim *= (dim[ctr]/2+1);
1095           else
1096             tot_dim *= dim[ctr];
1097 #endif
1098      }
1099 
1100   if (size == 1) {
1101 #if defined(PETSC_USE_COMPLEX)
1102     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1103     n = N;
1104 #else
1105     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1106     n = tot_dim;
1107 #endif
1108 
1109   } else {
1110     ptrdiff_t alloc_local,local_n0,local_0_start;
1111     switch (ndim){
1112     case 1:
1113 #if !defined(PETSC_USE_COMPLEX)
1114    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1115 #else
1116       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1117       n = (PetscInt)local_n0;
1118       n1 = (PetscInt) local_n1;
1119       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1120 #endif
1121       break;
1122     case 2:
1123 #if defined(PETSC_USE_COMPLEX)
1124       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1125       /*
1126        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1127        PetscSynchronizedFlush(comm);
1128        */
1129       n = (PetscInt)local_n0*dim[1];
1130       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1131 #else
1132       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1133       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1134       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1135 #endif
1136       break;
1137     case 3:
1138 #if defined(PETSC_USE_COMPLEX)
1139       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1140       n = (PetscInt)local_n0*dim[1]*dim[2];
1141       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1142 #else
1143       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1144       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1145       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1146 #endif
1147       break;
1148     default:
1149 #if defined(PETSC_USE_COMPLEX)
1150       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1151       n = (PetscInt)local_n0*partial_dim;
1152       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1153 #else
1154       temp = pdim[ndim-1];
1155       pdim[ndim-1] = temp/2 + 1;
1156       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1157       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1158       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1159       pdim[ndim-1] = temp;
1160       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1161 #endif
1162       break;
1163     }
1164   }
1165   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1166   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1167   fft->data = (void*)fftw;
1168 
1169   fft->n           = n;
1170   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1171   fftw->partial_dim = partial_dim;
1172   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1173   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1174 
1175   fftw->p_forward  = 0;
1176   fftw->p_backward = 0;
1177   fftw->p_flag     = FFTW_ESTIMATE;
1178 
1179   if (size == 1){
1180     A->ops->mult          = MatMult_SeqFFTW;
1181     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1182   } else {
1183     A->ops->mult          = MatMult_MPIFFTW;
1184     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1185   }
1186   fft->matdestroy          = MatDestroy_FFTW;
1187   A->assembled          = PETSC_TRUE;
1188   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
1189   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);
1190   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);
1191 
1192   /* get runtime options */
1193   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1194     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1195     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1196   PetscOptionsEnd();
1197   PetscFunctionReturn(0);
1198 }
1199 EXTERN_C_END
1200 
1201 
1202 
1203 
1204