xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision bebe2cf65d55febe21a5af8db2bd2e168caaa2e7)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t    ndim_fftw,*dim_fftw;
14 #if defined(PETSC_USE_64BIT_INDICES)
15   fftw_iodim64 *iodims;
16 #else
17   fftw_iodim   *iodims;
18 #endif
19   PetscInt     partial_dim;
20   fftw_plan    p_forward,p_backward;
21   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23                                                             executed for the arrays with which the plan was created */
24 } Mat_FFTW;
25 
26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30 extern PetscErrorCode MatDestroy_FFTW(Mat);
31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32 
33 /* MatMult_SeqFFTW performs forward DFT in parallel
34    Input parameter:
35      A - the matrix
36      x - the vector on which FDFT will be performed
37 
38    Output parameter:
39      y - vector that stores result of FDFT
40 */
41 #undef __FUNCT__
42 #define __FUNCT__ "MatMult_SeqFFTW"
43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44 {
45   PetscErrorCode ierr;
46   Mat_FFT        *fft  = (Mat_FFT*)A->data;
47   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
48   const PetscScalar *x_array;
49   PetscScalar    *y_array;
50 #if defined(PETSC_USE_COMPLEX)
51 #if defined(PETSC_USE_64BIT_INDICES)
52   fftw_iodim64   *iodims;
53 #else
54   fftw_iodim     *iodims;
55 #endif
56   PetscInt       i;
57 #endif
58   PetscInt       ndim = fft->ndim,*dim = fft->dim;
59 
60   PetscFunctionBegin;
61   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
62   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
63   if (!fftw->p_forward) { /* create a plan, then excute it */
64     switch (ndim) {
65     case 1:
66 #if defined(PETSC_USE_COMPLEX)
67       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68 #else
69       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
70 #endif
71       break;
72     case 2:
73 #if defined(PETSC_USE_COMPLEX)
74       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
75 #else
76       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
77 #endif
78       break;
79     case 3:
80 #if defined(PETSC_USE_COMPLEX)
81       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
82 #else
83       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
84 #endif
85       break;
86     default:
87 #if defined(PETSC_USE_COMPLEX)
88       iodims = fftw->iodims;
89 #if defined(PETSC_USE_64BIT_INDICES)
90       if (ndim) {
91         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
92         iodims[ndim-1].is = iodims[ndim-1].os = 1;
93         for (i=ndim-2; i>=0; --i) {
94           iodims[i].n = (ptrdiff_t)dim[i];
95           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
96         }
97       }
98       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
99 #else
100       if (ndim) {
101         iodims[ndim-1].n = (int)dim[ndim-1];
102         iodims[ndim-1].is = iodims[ndim-1].os = 1;
103         for (i=ndim-2; i>=0; --i) {
104           iodims[i].n = (int)dim[i];
105           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
106         }
107       }
108       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
109 #endif
110 
111 #else
112       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
113 #endif
114       break;
115     }
116     fftw->finarray  = (PetscScalar *) x_array;
117     fftw->foutarray = y_array;
118     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
119                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
120     fftw_execute(fftw->p_forward);
121   } else { /* use existing plan */
122     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
123       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
124     } else {
125       fftw_execute(fftw->p_forward);
126     }
127   }
128   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
129   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 /* MatMultTranspose_SeqFFTW performs serial backward DFT
134    Input parameter:
135      A - the matrix
136      x - the vector on which BDFT will be performed
137 
138    Output parameter:
139      y - vector that stores result of BDFT
140 */
141 
142 #undef __FUNCT__
143 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
144 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
145 {
146   PetscErrorCode ierr;
147   Mat_FFT        *fft  = (Mat_FFT*)A->data;
148   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
149   const PetscScalar *x_array;
150   PetscScalar    *y_array;
151   PetscInt       ndim=fft->ndim,*dim=fft->dim;
152 #if defined(PETSC_USE_COMPLEX)
153 #if defined(PETSC_USE_64BIT_INDICES)
154   fftw_iodim64   *iodims=fftw->iodims;
155 #else
156   fftw_iodim     *iodims=fftw->iodims;
157 #endif
158 #endif
159 
160   PetscFunctionBegin;
161   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
162   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163   if (!fftw->p_backward) { /* create a plan, then excute it */
164     switch (ndim) {
165     case 1:
166 #if defined(PETSC_USE_COMPLEX)
167       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
168 #else
169       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
170 #endif
171       break;
172     case 2:
173 #if defined(PETSC_USE_COMPLEX)
174       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
175 #else
176       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
177 #endif
178       break;
179     case 3:
180 #if defined(PETSC_USE_COMPLEX)
181       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
182 #else
183       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
184 #endif
185       break;
186     default:
187 #if defined(PETSC_USE_COMPLEX)
188 #if defined(PETSC_USE_64BIT_INDICES)
189       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
190 #else
191       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
192 #endif
193 #else
194       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
195 #endif
196       break;
197     }
198     fftw->binarray  = (PetscScalar *) x_array;
199     fftw->boutarray = y_array;
200     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
201   } else { /* use existing plan */
202     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
203       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
204     } else {
205       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
206     }
207   }
208   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
209   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
210   PetscFunctionReturn(0);
211 }
212 
213 /* MatMult_MPIFFTW performs forward DFT in parallel
214    Input parameter:
215      A - the matrix
216      x - the vector on which FDFT will be performed
217 
218    Output parameter:
219    y   - vector that stores result of FDFT
220 */
221 #undef __FUNCT__
222 #define __FUNCT__ "MatMult_MPIFFTW"
223 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
224 {
225   PetscErrorCode ierr;
226   Mat_FFT        *fft  = (Mat_FFT*)A->data;
227   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
228   const PetscScalar *x_array;
229   PetscScalar    *y_array;
230   PetscInt       ndim=fft->ndim,*dim=fft->dim;
231   MPI_Comm       comm;
232 
233   PetscFunctionBegin;
234   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
235   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
236   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
237   if (!fftw->p_forward) { /* create a plan, then excute it */
238     switch (ndim) {
239     case 1:
240 #if defined(PETSC_USE_COMPLEX)
241       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
242 #else
243       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
244 #endif
245       break;
246     case 2:
247 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
248       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
249 #else
250       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
251 #endif
252       break;
253     case 3:
254 #if defined(PETSC_USE_COMPLEX)
255       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
256 #else
257       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
258 #endif
259       break;
260     default:
261 #if defined(PETSC_USE_COMPLEX)
262       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
263 #else
264       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
265 #endif
266       break;
267     }
268     fftw->finarray  = (PetscScalar *) x_array;
269     fftw->foutarray = y_array;
270     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
271                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
272     fftw_execute(fftw->p_forward);
273   } else { /* use existing plan */
274     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
275       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
276     } else {
277       fftw_execute(fftw->p_forward);
278     }
279   }
280   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
281   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
282   PetscFunctionReturn(0);
283 }
284 
285 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
286    Input parameter:
287      A - the matrix
288      x - the vector on which BDFT will be performed
289 
290    Output parameter:
291      y - vector that stores result of BDFT
292 */
293 #undef __FUNCT__
294 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
295 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
296 {
297   PetscErrorCode ierr;
298   Mat_FFT        *fft  = (Mat_FFT*)A->data;
299   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
300   const PetscScalar *x_array;
301   PetscScalar    *y_array;
302   PetscInt       ndim=fft->ndim,*dim=fft->dim;
303   MPI_Comm       comm;
304 
305   PetscFunctionBegin;
306   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
307   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
308   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
309   if (!fftw->p_backward) { /* create a plan, then excute it */
310     switch (ndim) {
311     case 1:
312 #if defined(PETSC_USE_COMPLEX)
313       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
314 #else
315       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
316 #endif
317       break;
318     case 2:
319 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
320       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
321 #else
322       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
323 #endif
324       break;
325     case 3:
326 #if defined(PETSC_USE_COMPLEX)
327       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
328 #else
329       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
330 #endif
331       break;
332     default:
333 #if defined(PETSC_USE_COMPLEX)
334       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
335 #else
336       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
337 #endif
338       break;
339     }
340     fftw->binarray  = (PetscScalar *) x_array;
341     fftw->boutarray = y_array;
342     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
343   } else { /* use existing plan */
344     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
345       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
346     } else {
347       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
348     }
349   }
350   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
351   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
352   PetscFunctionReturn(0);
353 }
354 
355 #undef __FUNCT__
356 #define __FUNCT__ "MatDestroy_FFTW"
357 PetscErrorCode MatDestroy_FFTW(Mat A)
358 {
359   Mat_FFT        *fft  = (Mat_FFT*)A->data;
360   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
361   PetscErrorCode ierr;
362 
363   PetscFunctionBegin;
364   fftw_destroy_plan(fftw->p_forward);
365   fftw_destroy_plan(fftw->p_backward);
366   if (fftw->iodims) {
367     free(fftw->iodims);
368   }
369   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
370   ierr = PetscFree(fft->data);CHKERRQ(ierr);
371   fftw_mpi_cleanup();
372   PetscFunctionReturn(0);
373 }
374 
375 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
376 #undef __FUNCT__
377 #define __FUNCT__ "VecDestroy_MPIFFTW"
378 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
379 {
380   PetscErrorCode ierr;
381   PetscScalar    *array;
382 
383   PetscFunctionBegin;
384   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
385   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
386   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
387   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatCreateVecsFFTW"
393 /*@
394    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
395      parallel layout determined by FFTW
396 
397    Collective on Mat
398 
399    Input Parameter:
400 .   A - the matrix
401 
402    Output Parameter:
403 +   x - (optional) input vector of forward FFTW
404 -   y - (optional) output vector of forward FFTW
405 -   z - (optional) output vector of backward FFTW
406 
407   Level: advanced
408 
409   Note: The parallel layout of output of forward FFTW is always same as the input
410         of the backward FFTW. But parallel layout of the input vector of forward
411         FFTW might not be same as the output of backward FFTW.
412         Also note that we need to provide enough space while doing parallel real transform.
413         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
414         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
415         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
416         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
417         zeros if it is odd only one zero is needed.
418         Lastly one needs some scratch space at the end of data set in each process. alloc_local
419         figures out how much space is needed, i.e. it figures out the data+scratch space for
420         each processor and returns that.
421 
422 .seealso: MatCreateFFTW()
423 @*/
424 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
425 {
426   PetscErrorCode ierr;
427 
428   PetscFunctionBegin;
429   ierr = PetscTryMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatCreateVecsFFTW_FFTW"
435 PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
436 {
437   PetscErrorCode ierr;
438   PetscMPIInt    size,rank;
439   MPI_Comm       comm;
440   Mat_FFT        *fft  = (Mat_FFT*)A->data;
441   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
442   PetscInt       N     = fft->N;
443   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
444 
445   PetscFunctionBegin;
446   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
447   PetscValidType(A,1);
448   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
449 
450   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
451   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
452   if (size == 1) { /* sequential case */
453 #if defined(PETSC_USE_COMPLEX)
454     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
455     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
456     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
457 #else
458     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
459     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
460     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
461 #endif
462   } else { /* parallel cases */
463     ptrdiff_t    alloc_local,local_n0,local_0_start;
464     ptrdiff_t    local_n1;
465     fftw_complex *data_fout;
466     ptrdiff_t    local_1_start;
467 #if defined(PETSC_USE_COMPLEX)
468     fftw_complex *data_fin,*data_bout;
469 #else
470     double    *data_finr,*data_boutr;
471     PetscInt  n1,N1;
472     ptrdiff_t temp;
473 #endif
474 
475     switch (ndim) {
476     case 1:
477 #if !defined(PETSC_USE_COMPLEX)
478       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
479 #else
480       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
481       if (fin) {
482         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
483         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
484 
485         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
486       }
487       if (fout) {
488         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
489         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
490 
491         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
492       }
493       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
494       if (bout) {
495         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
496         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
497 
498         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
499       }
500       break;
501 #endif
502     case 2:
503 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
504       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
505       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
506       if (fin) {
507         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
508         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
509 
510         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
511       }
512       if (bout) {
513         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
514         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
515 
516         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
517       }
518       if (fout) {
519         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
520         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
521 
522         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
523       }
524 #else
525       /* Get local size */
526       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
527       if (fin) {
528         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
529         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
530 
531         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
532       }
533       if (fout) {
534         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
535         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
536 
537         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
538       }
539       if (bout) {
540         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
541         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
542 
543         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
544       }
545 #endif
546       break;
547     case 3:
548 #if !defined(PETSC_USE_COMPLEX)
549       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
550       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
551       if (fin) {
552         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
553         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
554 
555         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
556       }
557       if (bout) {
558         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
559         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
560 
561         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
562       }
563 
564       if (fout) {
565         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
566         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
567 
568         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
569       }
570 #else
571       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
572       if (fin) {
573         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
574         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
575 
576         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
577       }
578       if (fout) {
579         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
580         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
581 
582         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
583       }
584       if (bout) {
585         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
586         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
587 
588         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
589       }
590 #endif
591       break;
592     default:
593 #if !defined(PETSC_USE_COMPLEX)
594       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
595 
596       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
597 
598       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
599       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
600 
601       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
602 
603       if (fin) {
604         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
605         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
606 
607         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
608       }
609       if (bout) {
610         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
611         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
612 
613         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
614       }
615 
616       if (fout) {
617         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
618         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
619 
620         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
621       }
622 #else
623       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
624       if (fin) {
625         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
626         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
627 
628         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
629       }
630       if (fout) {
631         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
632         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
633 
634         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
635       }
636       if (bout) {
637         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
638         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
639 
640         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
641       }
642 #endif
643       break;
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "VecScatterPetscToFFTW"
651 /*@
652    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
653 
654    Collective on Mat
655 
656    Input Parameters:
657 +  A - FFTW matrix
658 -  x - the PETSc vector
659 
660    Output Parameters:
661 .  y - the FFTW vector
662 
663   Options Database Keys:
664 . -mat_fftw_plannerflags - set FFTW planner flags
665 
666    Level: intermediate
667 
668    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
669          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
670          zeros. This routine does that job by scattering operation.
671 
672 .seealso: VecScatterFFTWToPetsc()
673 @*/
674 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
675 {
676   PetscErrorCode ierr;
677 
678   PetscFunctionBegin;
679   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
680   PetscFunctionReturn(0);
681 }
682 
683 #undef __FUNCT__
684 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
685 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
686 {
687   PetscErrorCode ierr;
688   MPI_Comm       comm;
689   Mat_FFT        *fft  = (Mat_FFT*)A->data;
690   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
691   PetscInt       N     =fft->N;
692   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
693   PetscInt       low;
694   PetscMPIInt    rank,size;
695   PetscInt       vsize,vsize1;
696   ptrdiff_t      local_n0,local_0_start;
697   ptrdiff_t      local_n1,local_1_start;
698   VecScatter     vecscat;
699   IS             list1,list2;
700 #if !defined(PETSC_USE_COMPLEX)
701   PetscInt       i,j,k,partial_dim;
702   PetscInt       *indx1, *indx2, tempindx, tempindx1;
703   PetscInt       NM;
704   ptrdiff_t      temp;
705 #endif
706 
707   PetscFunctionBegin;
708   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
709   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
710   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
711   ierr = VecGetOwnershipRange(y,&low,NULL);
712 
713   if (size==1) {
714     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
715     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
716     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
717     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
718     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
719     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
721     ierr = ISDestroy(&list1);CHKERRQ(ierr);
722   } else {
723     switch (ndim) {
724     case 1:
725 #if defined(PETSC_USE_COMPLEX)
726       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
727 
728       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
729       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
730       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
731       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
732       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
733       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
734       ierr = ISDestroy(&list1);CHKERRQ(ierr);
735       ierr = ISDestroy(&list2);CHKERRQ(ierr);
736 #else
737       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
738 #endif
739       break;
740     case 2:
741 #if defined(PETSC_USE_COMPLEX)
742       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
743 
744       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
745       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
746       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
747       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
748       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
750       ierr = ISDestroy(&list1);CHKERRQ(ierr);
751       ierr = ISDestroy(&list2);CHKERRQ(ierr);
752 #else
753       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
754 
755       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
756       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
757 
758       if (dim[1]%2==0) {
759         NM = dim[1]+2;
760       } else {
761         NM = dim[1]+1;
762       }
763       for (i=0; i<local_n0; i++) {
764         for (j=0; j<dim[1]; j++) {
765           tempindx  = i*dim[1] + j;
766           tempindx1 = i*NM + j;
767 
768           indx1[tempindx]=local_0_start*dim[1]+tempindx;
769           indx2[tempindx]=low+tempindx1;
770         }
771       }
772 
773       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
774       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
775 
776       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
777       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
778       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
779       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
780       ierr = ISDestroy(&list1);CHKERRQ(ierr);
781       ierr = ISDestroy(&list2);CHKERRQ(ierr);
782       ierr = PetscFree(indx1);CHKERRQ(ierr);
783       ierr = PetscFree(indx2);CHKERRQ(ierr);
784 #endif
785       break;
786 
787     case 3:
788 #if defined(PETSC_USE_COMPLEX)
789       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
790 
791       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
792       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
793       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
794       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
795       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
796       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
797       ierr = ISDestroy(&list1);CHKERRQ(ierr);
798       ierr = ISDestroy(&list2);CHKERRQ(ierr);
799 #else
800       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
801       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
802       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
803 
804       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
805       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
806 
807       if (dim[2]%2==0) NM = dim[2]+2;
808       else             NM = dim[2]+1;
809 
810       for (i=0; i<local_n0; i++) {
811         for (j=0; j<dim[1]; j++) {
812           for (k=0; k<dim[2]; k++) {
813             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
814             tempindx1 = i*dim[1]*NM + j*NM + k;
815 
816             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
817             indx2[tempindx]=low+tempindx1;
818           }
819         }
820       }
821 
822       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
823       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
824       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
825       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
828       ierr = ISDestroy(&list1);CHKERRQ(ierr);
829       ierr = ISDestroy(&list2);CHKERRQ(ierr);
830       ierr = PetscFree(indx1);CHKERRQ(ierr);
831       ierr = PetscFree(indx2);CHKERRQ(ierr);
832 #endif
833       break;
834 
835     default:
836 #if defined(PETSC_USE_COMPLEX)
837       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
838 
839       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
840       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
841       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
842       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
844       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
845       ierr = ISDestroy(&list1);CHKERRQ(ierr);
846       ierr = ISDestroy(&list2);CHKERRQ(ierr);
847 #else
848       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
849       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
850       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
851 
852       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
853 
854       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
855 
856       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
857 
858       partial_dim = fftw->partial_dim;
859 
860       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
861       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
862 
863       if (dim[ndim-1]%2==0) NM = 2;
864       else                  NM = 1;
865 
866       j = low;
867       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
868         indx1[i] = local_0_start*partial_dim + i;
869         indx2[i] = j;
870         if (k%dim[ndim-1]==0) j+=NM;
871         j++;
872       }
873       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
874       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
875       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
876       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
877       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
879       ierr = ISDestroy(&list1);CHKERRQ(ierr);
880       ierr = ISDestroy(&list2);CHKERRQ(ierr);
881       ierr = PetscFree(indx1);CHKERRQ(ierr);
882       ierr = PetscFree(indx2);CHKERRQ(ierr);
883 #endif
884       break;
885     }
886   }
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "VecScatterFFTWToPetsc"
892 /*@
893    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
894 
895    Collective on Mat
896 
897     Input Parameters:
898 +   A - FFTW matrix
899 -   x - FFTW vector
900 
901    Output Parameters:
902 .  y - PETSc vector
903 
904    Level: intermediate
905 
906    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
907          VecScatterFFTWToPetsc removes those extra zeros.
908 
909 .seealso: VecScatterPetscToFFTW()
910 @*/
911 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
912 {
913   PetscErrorCode ierr;
914 
915   PetscFunctionBegin;
916   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
922 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
923 {
924   PetscErrorCode ierr;
925   MPI_Comm       comm;
926   Mat_FFT        *fft  = (Mat_FFT*)A->data;
927   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
928   PetscInt       N     = fft->N;
929   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
930   PetscInt       low;
931   PetscMPIInt    rank,size;
932   ptrdiff_t      local_n0,local_0_start;
933   ptrdiff_t      local_n1,local_1_start;
934   VecScatter     vecscat;
935   IS             list1,list2;
936 #if !defined(PETSC_USE_COMPLEX)
937   PetscInt       i,j,k,partial_dim;
938   PetscInt       *indx1, *indx2, tempindx, tempindx1;
939   PetscInt       NM;
940   ptrdiff_t      temp;
941 #endif
942 
943   PetscFunctionBegin;
944   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
945   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
946   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
947   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
948 
949   if (size==1) {
950     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
951     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
952     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
953     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
955     ierr = ISDestroy(&list1);CHKERRQ(ierr);
956 
957   } else {
958     switch (ndim) {
959     case 1:
960 #if defined(PETSC_USE_COMPLEX)
961       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
962 
963       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
964       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
965       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
966       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
969       ierr = ISDestroy(&list1);CHKERRQ(ierr);
970       ierr = ISDestroy(&list2);CHKERRQ(ierr);
971 #else
972       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
973 #endif
974       break;
975     case 2:
976 #if defined(PETSC_USE_COMPLEX)
977       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
978 
979       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
980       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
981       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
982       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
984       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
985       ierr = ISDestroy(&list1);CHKERRQ(ierr);
986       ierr = ISDestroy(&list2);CHKERRQ(ierr);
987 #else
988       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
989 
990       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
991       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
992 
993       if (dim[1]%2==0) NM = dim[1]+2;
994       else             NM = dim[1]+1;
995 
996       for (i=0; i<local_n0; i++) {
997         for (j=0; j<dim[1]; j++) {
998           tempindx = i*dim[1] + j;
999           tempindx1 = i*NM + j;
1000 
1001           indx1[tempindx]=local_0_start*dim[1]+tempindx;
1002           indx2[tempindx]=low+tempindx1;
1003         }
1004       }
1005 
1006       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1007       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1008 
1009       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1010       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1012       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1013       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1014       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1015       ierr = PetscFree(indx1);CHKERRQ(ierr);
1016       ierr = PetscFree(indx2);CHKERRQ(ierr);
1017 #endif
1018       break;
1019     case 3:
1020 #if defined(PETSC_USE_COMPLEX)
1021       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1022 
1023       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1024       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1025       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1026       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1029       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1030       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1031 #else
1032       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1033 
1034       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1035       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1036 
1037       if (dim[2]%2==0) NM = dim[2]+2;
1038       else             NM = dim[2]+1;
1039 
1040       for (i=0; i<local_n0; i++) {
1041         for (j=0; j<dim[1]; j++) {
1042           for (k=0; k<dim[2]; k++) {
1043             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1044             tempindx1 = i*dim[1]*NM + j*NM + k;
1045 
1046             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1047             indx2[tempindx]=low+tempindx1;
1048           }
1049         }
1050       }
1051 
1052       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1053       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1054 
1055       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1056       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1057       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1059       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1060       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1061       ierr = PetscFree(indx1);CHKERRQ(ierr);
1062       ierr = PetscFree(indx2);CHKERRQ(ierr);
1063 #endif
1064       break;
1065     default:
1066 #if defined(PETSC_USE_COMPLEX)
1067       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1068 
1069       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1070       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1071       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1072       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1073       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1074       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1075       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1076       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1077 #else
1078       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1079 
1080       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1081 
1082       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1083 
1084       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1085 
1086       partial_dim = fftw->partial_dim;
1087 
1088       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1089       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1090 
1091       if (dim[ndim-1]%2==0) NM = 2;
1092       else                  NM = 1;
1093 
1094       j = low;
1095       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1096         indx1[i] = local_0_start*partial_dim + i;
1097         indx2[i] = j;
1098         if (k%dim[ndim-1]==0) j+=NM;
1099         j++;
1100       }
1101       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1102       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1103 
1104       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1105       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1106       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1107       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1108       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1109       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1110       ierr = PetscFree(indx1);CHKERRQ(ierr);
1111       ierr = PetscFree(indx2);CHKERRQ(ierr);
1112 #endif
1113       break;
1114     }
1115   }
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatCreate_FFTW"
1121 /*
1122     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1123 
1124   Options Database Keys:
1125 + -mat_fftw_plannerflags - set FFTW planner flags
1126 
1127    Level: intermediate
1128 
1129 */
1130 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1131 {
1132   PetscErrorCode ierr;
1133   MPI_Comm       comm;
1134   Mat_FFT        *fft=(Mat_FFT*)A->data;
1135   Mat_FFTW       *fftw;
1136   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1137   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1138   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1139   PetscBool      flg;
1140   PetscInt       p_flag,partial_dim=1,ctr;
1141   PetscMPIInt    size,rank;
1142   ptrdiff_t      *pdim;
1143   ptrdiff_t      local_n1,local_1_start;
1144 #if !defined(PETSC_USE_COMPLEX)
1145   ptrdiff_t      temp;
1146   PetscInt       N1,tot_dim;
1147 #else
1148   PetscInt       n1;
1149 #endif
1150 
1151   PetscFunctionBegin;
1152   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1153   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1154   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1155 
1156   fftw_mpi_init();
1157   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1158   pdim[0] = dim[0];
1159 #if !defined(PETSC_USE_COMPLEX)
1160   tot_dim = 2*dim[0];
1161 #endif
1162   for (ctr=1; ctr<ndim; ctr++) {
1163     partial_dim *= dim[ctr];
1164     pdim[ctr]    = dim[ctr];
1165 #if !defined(PETSC_USE_COMPLEX)
1166     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1167     else             tot_dim *= dim[ctr];
1168 #endif
1169   }
1170 
1171   if (size == 1) {
1172 #if defined(PETSC_USE_COMPLEX)
1173     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1174     n    = N;
1175 #else
1176     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1177     n    = tot_dim;
1178 #endif
1179 
1180   } else {
1181     ptrdiff_t local_n0,local_0_start;
1182     switch (ndim) {
1183     case 1:
1184 #if !defined(PETSC_USE_COMPLEX)
1185       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1186 #else
1187       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1188 
1189       n    = (PetscInt)local_n0;
1190       n1   = (PetscInt)local_n1;
1191       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1192 #endif
1193       break;
1194     case 2:
1195 #if defined(PETSC_USE_COMPLEX)
1196       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1197       /*
1198        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1199        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1200        */
1201       n    = (PetscInt)local_n0*dim[1];
1202       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1203 #else
1204       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1205 
1206       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1207       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1208 #endif
1209       break;
1210     case 3:
1211 #if defined(PETSC_USE_COMPLEX)
1212       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1213 
1214       n    = (PetscInt)local_n0*dim[1]*dim[2];
1215       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1216 #else
1217       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1218 
1219       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1220       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1221 #endif
1222       break;
1223     default:
1224 #if defined(PETSC_USE_COMPLEX)
1225       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1226 
1227       n    = (PetscInt)local_n0*partial_dim;
1228       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1229 #else
1230       temp = pdim[ndim-1];
1231 
1232       pdim[ndim-1] = temp/2 + 1;
1233 
1234       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1235 
1236       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1237       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1238 
1239       pdim[ndim-1] = temp;
1240 
1241       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1242 #endif
1243       break;
1244     }
1245   }
1246   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1247   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1248   fft->data = (void*)fftw;
1249 
1250   fft->n            = n;
1251   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1252   fftw->partial_dim = partial_dim;
1253 
1254   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1255   if (size == 1) {
1256 #if defined(PETSC_USE_64BIT_INDICES)
1257     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1258 #else
1259     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1260 #endif
1261   }
1262 
1263   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1264 
1265   fftw->p_forward  = 0;
1266   fftw->p_backward = 0;
1267   fftw->p_flag     = FFTW_ESTIMATE;
1268 
1269   if (size == 1) {
1270     A->ops->mult          = MatMult_SeqFFTW;
1271     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1272   } else {
1273     A->ops->mult          = MatMult_MPIFFTW;
1274     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1275   }
1276   fft->matdestroy = MatDestroy_FFTW;
1277   A->assembled    = PETSC_TRUE;
1278   A->preallocated = PETSC_TRUE;
1279 
1280   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1283 
1284   /* get runtime options */
1285   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1286   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1287   if (flg) {
1288     fftw->p_flag = iplans[p_flag];
1289   }
1290   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 
1295 
1296 
1297