xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 44b85a236d0c752951b0573ba76bfb3134d48c1e)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t    ndim_fftw,*dim_fftw;
14 #if defined(PETSC_USE_64BIT_INDICES)
15   fftw_iodim64 *iodims;
16 #else
17   fftw_iodim   *iodims;
18 #endif
19   PetscInt     partial_dim;
20   fftw_plan    p_forward,p_backward;
21   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23                                                             executed for the arrays with which the plan was created */
24 } Mat_FFTW;
25 
26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30 extern PetscErrorCode MatDestroy_FFTW(Mat);
31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32 
33 /* MatMult_SeqFFTW performs forward DFT in parallel
34    Input parameter:
35      A - the matrix
36      x - the vector on which FDFT will be performed
37 
38    Output parameter:
39      y - vector that stores result of FDFT
40 */
41 #undef __FUNCT__
42 #define __FUNCT__ "MatMult_SeqFFTW"
43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44 {
45   PetscErrorCode ierr;
46   Mat_FFT        *fft  = (Mat_FFT*)A->data;
47   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
48   PetscScalar    *x_array,*y_array;
49 #if defined(PETSC_USE_COMPLEX)
50 #if defined(PETSC_USE_64BIT_INDICES)
51   fftw_iodim64   *iodims;
52 #else
53   fftw_iodim     *iodims;
54 #endif
55   PetscInt       i;
56 #endif
57   PetscInt       ndim = fft->ndim,*dim = fft->dim;
58 
59   PetscFunctionBegin;
60   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
61   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
62   if (!fftw->p_forward) { /* create a plan, then excute it */
63     switch (ndim) {
64     case 1:
65 #if defined(PETSC_USE_COMPLEX)
66       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
67 #else
68       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
69 #endif
70       break;
71     case 2:
72 #if defined(PETSC_USE_COMPLEX)
73       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
74 #else
75       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
76 #endif
77       break;
78     case 3:
79 #if defined(PETSC_USE_COMPLEX)
80       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
81 #else
82       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
83 #endif
84       break;
85     default:
86 #if defined(PETSC_USE_COMPLEX)
87       iodims = fftw->iodims;
88 #if defined(PETSC_USE_64BIT_INDICES)
89       if (ndim) {
90         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
91         iodims[ndim-1].is = iodims[ndim-1].os = 1;
92         for (i=ndim-2; i>=0; --i) {
93           iodims[i].n = (ptrdiff_t)dim[i];
94           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
95         }
96       }
97       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
98 #else
99       if (ndim) {
100         iodims[ndim-1].n = (int)dim[ndim-1];
101         iodims[ndim-1].is = iodims[ndim-1].os = 1;
102         for (i=ndim-2; i>=0; --i) {
103           iodims[i].n = (int)dim[i];
104           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
105         }
106       }
107       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
108 #endif
109 
110 #else
111       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
112 #endif
113       break;
114     }
115     fftw->finarray  = x_array;
116     fftw->foutarray = y_array;
117     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
118                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
119     fftw_execute(fftw->p_forward);
120   } else { /* use existing plan */
121     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
122       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
123     } else {
124       fftw_execute(fftw->p_forward);
125     }
126   }
127   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
128   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 /* MatMultTranspose_SeqFFTW performs serial backward DFT
133    Input parameter:
134      A - the matrix
135      x - the vector on which BDFT will be performed
136 
137    Output parameter:
138      y - vector that stores result of BDFT
139 */
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
143 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
144 {
145   PetscErrorCode ierr;
146   Mat_FFT        *fft  = (Mat_FFT*)A->data;
147   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
148   PetscScalar    *x_array,*y_array;
149   PetscInt       ndim=fft->ndim,*dim=fft->dim;
150 #if defined(PETSC_USE_COMPLEX)
151 #if defined(PETSC_USE_64BIT_INDICES)
152   fftw_iodim64   *iodims=fftw->iodims;
153 #else
154   fftw_iodim     *iodims=fftw->iodims;
155 #endif
156 #endif
157 
158   PetscFunctionBegin;
159   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
160   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
161   if (!fftw->p_backward) { /* create a plan, then excute it */
162     switch (ndim) {
163     case 1:
164 #if defined(PETSC_USE_COMPLEX)
165       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
166 #else
167       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
168 #endif
169       break;
170     case 2:
171 #if defined(PETSC_USE_COMPLEX)
172       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
173 #else
174       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
175 #endif
176       break;
177     case 3:
178 #if defined(PETSC_USE_COMPLEX)
179       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
180 #else
181       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
182 #endif
183       break;
184     default:
185 #if defined(PETSC_USE_COMPLEX)
186 #if defined(PETSC_USE_64BIT_INDICES)
187       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
188 #else
189       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
190 #endif
191 #else
192       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
193 #endif
194       break;
195     }
196     fftw->binarray  = x_array;
197     fftw->boutarray = y_array;
198     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
199   } else { /* use existing plan */
200     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
201       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202     } else {
203       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
204     }
205   }
206   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 /* MatMult_MPIFFTW performs forward DFT in parallel
212    Input parameter:
213      A - the matrix
214      x - the vector on which FDFT will be performed
215 
216    Output parameter:
217    y   - vector that stores result of FDFT
218 */
219 #undef __FUNCT__
220 #define __FUNCT__ "MatMult_MPIFFTW"
221 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
222 {
223   PetscErrorCode ierr;
224   Mat_FFT        *fft  = (Mat_FFT*)A->data;
225   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
226   PetscScalar    *x_array,*y_array;
227   PetscInt       ndim=fft->ndim,*dim=fft->dim;
228   MPI_Comm       comm;
229 
230   PetscFunctionBegin;
231   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
232   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
233   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
234   if (!fftw->p_forward) { /* create a plan, then excute it */
235     switch (ndim) {
236     case 1:
237 #if defined(PETSC_USE_COMPLEX)
238       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
239 #else
240       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
241 #endif
242       break;
243     case 2:
244 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
245       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
246 #else
247       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
248 #endif
249       break;
250     case 3:
251 #if defined(PETSC_USE_COMPLEX)
252       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
253 #else
254       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
255 #endif
256       break;
257     default:
258 #if defined(PETSC_USE_COMPLEX)
259       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
260 #else
261       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
262 #endif
263       break;
264     }
265     fftw->finarray  = x_array;
266     fftw->foutarray = y_array;
267     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
268                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
269     fftw_execute(fftw->p_forward);
270   } else { /* use existing plan */
271     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
272       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
273     } else {
274       fftw_execute(fftw->p_forward);
275     }
276   }
277   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
278   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
283    Input parameter:
284      A - the matrix
285      x - the vector on which BDFT will be performed
286 
287    Output parameter:
288      y - vector that stores result of BDFT
289 */
290 #undef __FUNCT__
291 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
292 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
293 {
294   PetscErrorCode ierr;
295   Mat_FFT        *fft  = (Mat_FFT*)A->data;
296   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
297   PetscScalar    *x_array,*y_array;
298   PetscInt       ndim=fft->ndim,*dim=fft->dim;
299   MPI_Comm       comm;
300 
301   PetscFunctionBegin;
302   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
303   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
304   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
305   if (!fftw->p_backward) { /* create a plan, then excute it */
306     switch (ndim) {
307     case 1:
308 #if defined(PETSC_USE_COMPLEX)
309       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
310 #else
311       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
312 #endif
313       break;
314     case 2:
315 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
316       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
317 #else
318       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
319 #endif
320       break;
321     case 3:
322 #if defined(PETSC_USE_COMPLEX)
323       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
324 #else
325       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
326 #endif
327       break;
328     default:
329 #if defined(PETSC_USE_COMPLEX)
330       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
331 #else
332       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
333 #endif
334       break;
335     }
336     fftw->binarray  = x_array;
337     fftw->boutarray = y_array;
338     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
339   } else { /* use existing plan */
340     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
341       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
342     } else {
343       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
344     }
345   }
346   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
347   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "MatDestroy_FFTW"
353 PetscErrorCode MatDestroy_FFTW(Mat A)
354 {
355   Mat_FFT        *fft  = (Mat_FFT*)A->data;
356   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   fftw_destroy_plan(fftw->p_forward);
361   fftw_destroy_plan(fftw->p_backward);
362   if (fftw->iodims) {
363     free(fftw->iodims);
364   }
365   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
366   ierr = PetscFree(fft->data);CHKERRQ(ierr);
367   fftw_mpi_cleanup();
368   PetscFunctionReturn(0);
369 }
370 
371 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
372 #undef __FUNCT__
373 #define __FUNCT__ "VecDestroy_MPIFFTW"
374 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
375 {
376   PetscErrorCode ierr;
377   PetscScalar    *array;
378 
379   PetscFunctionBegin;
380   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
381   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
382   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
383   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
384   PetscFunctionReturn(0);
385 }
386 
387 #undef __FUNCT__
388 #define __FUNCT__ "MatCreateVecsFFTW"
389 /*@
390    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
391      parallel layout determined by FFTW
392 
393    Collective on Mat
394 
395    Input Parameter:
396 .   A - the matrix
397 
398    Output Parameter:
399 +   x - (optional) input vector of forward FFTW
400 -   y - (optional) output vector of forward FFTW
401 -   z - (optional) output vector of backward FFTW
402 
403   Level: advanced
404 
405   Note: The parallel layout of output of forward FFTW is always same as the input
406         of the backward FFTW. But parallel layout of the input vector of forward
407         FFTW might not be same as the output of backward FFTW.
408         Also note that we need to provide enough space while doing parallel real transform.
409         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
410         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
411         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
412         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
413         zeros if it is odd only one zero is needed.
414         Lastly one needs some scratch space at the end of data set in each process. alloc_local
415         figures out how much space is needed, i.e. it figures out the data+scratch space for
416         each processor and returns that.
417 
418 .seealso: MatCreateFFTW()
419 @*/
420 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
421 {
422   PetscErrorCode ierr;
423 
424   PetscFunctionBegin;
425   ierr = PetscTryMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatCreateVecsFFTW_FFTW"
431 PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
432 {
433   PetscErrorCode ierr;
434   PetscMPIInt    size,rank;
435   MPI_Comm       comm;
436   Mat_FFT        *fft  = (Mat_FFT*)A->data;
437   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
438   PetscInt       N     = fft->N;
439   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
440 
441   PetscFunctionBegin;
442   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
443   PetscValidType(A,1);
444   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
445 
446   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
447   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
448   if (size == 1) { /* sequential case */
449 #if defined(PETSC_USE_COMPLEX)
450     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
451     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
452     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
453 #else
454     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
455     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
456     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
457 #endif
458   } else { /* parallel cases */
459     ptrdiff_t    alloc_local,local_n0,local_0_start;
460     ptrdiff_t    local_n1;
461     fftw_complex *data_fout;
462     ptrdiff_t    local_1_start;
463 #if defined(PETSC_USE_COMPLEX)
464     fftw_complex *data_fin,*data_bout;
465 #else
466     double    *data_finr,*data_boutr;
467     PetscInt  n1,N1;
468     ptrdiff_t temp;
469 #endif
470 
471     switch (ndim) {
472     case 1:
473 #if !defined(PETSC_USE_COMPLEX)
474       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
475 #else
476       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
477       if (fin) {
478         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
479         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
480 
481         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
482       }
483       if (fout) {
484         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
485         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
486 
487         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
488       }
489       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
490       if (bout) {
491         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
493 
494         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
495       }
496       break;
497 #endif
498     case 2:
499 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
500       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
501       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
502       if (fin) {
503         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
504         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
505 
506         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
507       }
508       if (bout) {
509         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
510         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
511 
512         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
513       }
514       if (fout) {
515         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
516         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
517 
518         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
519       }
520 #else
521       /* Get local size */
522       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
523       if (fin) {
524         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
525         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
526 
527         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
528       }
529       if (fout) {
530         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
531         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
532 
533         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
534       }
535       if (bout) {
536         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
538 
539         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
540       }
541 #endif
542       break;
543     case 3:
544 #if !defined(PETSC_USE_COMPLEX)
545       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
546       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
547       if (fin) {
548         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
549         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
550 
551         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
552       }
553       if (bout) {
554         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
555         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
556 
557         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
558       }
559 
560       if (fout) {
561         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
562         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
563 
564         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
565       }
566 #else
567       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
568       if (fin) {
569         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
570         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
571 
572         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
573       }
574       if (fout) {
575         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
576         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
577 
578         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
579       }
580       if (bout) {
581         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
582         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
583 
584         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
585       }
586 #endif
587       break;
588     default:
589 #if !defined(PETSC_USE_COMPLEX)
590       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
591 
592       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
593 
594       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
595       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
596 
597       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
598 
599       if (fin) {
600         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
601         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
602 
603         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
604       }
605       if (bout) {
606         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
607         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
608 
609         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
610       }
611 
612       if (fout) {
613         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
614         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
615 
616         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
617       }
618 #else
619       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
620       if (fin) {
621         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
622         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
623 
624         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
625       }
626       if (fout) {
627         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
628         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
629 
630         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
631       }
632       if (bout) {
633         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
634         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
635 
636         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
637       }
638 #endif
639       break;
640     }
641   }
642   PetscFunctionReturn(0);
643 }
644 
645 #undef __FUNCT__
646 #define __FUNCT__ "VecScatterPetscToFFTW"
647 /*@
648    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
649 
650    Collective on Mat
651 
652    Input Parameters:
653 +  A - FFTW matrix
654 -  x - the PETSc vector
655 
656    Output Parameters:
657 .  y - the FFTW vector
658 
659   Options Database Keys:
660 . -mat_fftw_plannerflags - set FFTW planner flags
661 
662    Level: intermediate
663 
664    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
665          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
666          zeros. This routine does that job by scattering operation.
667 
668 .seealso: VecScatterFFTWToPetsc()
669 @*/
670 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
671 {
672   PetscErrorCode ierr;
673 
674   PetscFunctionBegin;
675   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
681 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
682 {
683   PetscErrorCode ierr;
684   MPI_Comm       comm;
685   Mat_FFT        *fft  = (Mat_FFT*)A->data;
686   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
687   PetscInt       N     =fft->N;
688   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
689   PetscInt       low;
690   PetscMPIInt    rank,size;
691   PetscInt       vsize,vsize1;
692   ptrdiff_t      local_n0,local_0_start;
693   ptrdiff_t      local_n1,local_1_start;
694   VecScatter     vecscat;
695   IS             list1,list2;
696 #if !defined(PETSC_USE_COMPLEX)
697   PetscInt       i,j,k,partial_dim;
698   PetscInt       *indx1, *indx2, tempindx, tempindx1;
699   PetscInt       NM;
700   ptrdiff_t      temp;
701 #endif
702 
703   PetscFunctionBegin;
704   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
705   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
706   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
707   ierr = VecGetOwnershipRange(y,&low,NULL);
708 
709   if (size==1) {
710     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
711     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
712     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
713     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
714     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
715     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
716     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
717     ierr = ISDestroy(&list1);CHKERRQ(ierr);
718   } else {
719     switch (ndim) {
720     case 1:
721 #if defined(PETSC_USE_COMPLEX)
722       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
723 
724       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
725       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
726       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
727       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
730       ierr = ISDestroy(&list1);CHKERRQ(ierr);
731       ierr = ISDestroy(&list2);CHKERRQ(ierr);
732 #else
733       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
734 #endif
735       break;
736     case 2:
737 #if defined(PETSC_USE_COMPLEX)
738       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
739 
740       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
741       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
742       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
743       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
744       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
745       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
746       ierr = ISDestroy(&list1);CHKERRQ(ierr);
747       ierr = ISDestroy(&list2);CHKERRQ(ierr);
748 #else
749       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
750 
751       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
752       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
753 
754       if (dim[1]%2==0) {
755         NM = dim[1]+2;
756       } else {
757         NM = dim[1]+1;
758       }
759       for (i=0; i<local_n0; i++) {
760         for (j=0; j<dim[1]; j++) {
761           tempindx  = i*dim[1] + j;
762           tempindx1 = i*NM + j;
763 
764           indx1[tempindx]=local_0_start*dim[1]+tempindx;
765           indx2[tempindx]=low+tempindx1;
766         }
767       }
768 
769       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
770       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
771 
772       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
773       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
774       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
775       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
776       ierr = ISDestroy(&list1);CHKERRQ(ierr);
777       ierr = ISDestroy(&list2);CHKERRQ(ierr);
778       ierr = PetscFree(indx1);CHKERRQ(ierr);
779       ierr = PetscFree(indx2);CHKERRQ(ierr);
780 #endif
781       break;
782 
783     case 3:
784 #if defined(PETSC_USE_COMPLEX)
785       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
786 
787       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
788       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
789       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
790       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
791       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
792       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
793       ierr = ISDestroy(&list1);CHKERRQ(ierr);
794       ierr = ISDestroy(&list2);CHKERRQ(ierr);
795 #else
796       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
797       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
798       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
799 
800       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
801       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
802 
803       if (dim[2]%2==0) NM = dim[2]+2;
804       else             NM = dim[2]+1;
805 
806       for (i=0; i<local_n0; i++) {
807         for (j=0; j<dim[1]; j++) {
808           for (k=0; k<dim[2]; k++) {
809             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
810             tempindx1 = i*dim[1]*NM + j*NM + k;
811 
812             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
813             indx2[tempindx]=low+tempindx1;
814           }
815         }
816       }
817 
818       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
819       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
820       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
821       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
824       ierr = ISDestroy(&list1);CHKERRQ(ierr);
825       ierr = ISDestroy(&list2);CHKERRQ(ierr);
826       ierr = PetscFree(indx1);CHKERRQ(ierr);
827       ierr = PetscFree(indx2);CHKERRQ(ierr);
828 #endif
829       break;
830 
831     default:
832 #if defined(PETSC_USE_COMPLEX)
833       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
834 
835       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
836       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
837       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
838       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
839       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
840       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
841       ierr = ISDestroy(&list1);CHKERRQ(ierr);
842       ierr = ISDestroy(&list2);CHKERRQ(ierr);
843 #else
844       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
845       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
846       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
847 
848       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
849 
850       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
851 
852       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
853 
854       partial_dim = fftw->partial_dim;
855 
856       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
857       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
858 
859       if (dim[ndim-1]%2==0) NM = 2;
860       else                  NM = 1;
861 
862       j = low;
863       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
864         indx1[i] = local_0_start*partial_dim + i;
865         indx2[i] = j;
866         if (k%dim[ndim-1]==0) j+=NM;
867         j++;
868       }
869       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
870       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
871       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
872       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
875       ierr = ISDestroy(&list1);CHKERRQ(ierr);
876       ierr = ISDestroy(&list2);CHKERRQ(ierr);
877       ierr = PetscFree(indx1);CHKERRQ(ierr);
878       ierr = PetscFree(indx2);CHKERRQ(ierr);
879 #endif
880       break;
881     }
882   }
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "VecScatterFFTWToPetsc"
888 /*@
889    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
890 
891    Collective on Mat
892 
893     Input Parameters:
894 +   A - FFTW matrix
895 -   x - FFTW vector
896 
897    Output Parameters:
898 .  y - PETSc vector
899 
900    Level: intermediate
901 
902    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
903          VecScatterFFTWToPetsc removes those extra zeros.
904 
905 .seealso: VecScatterPetscToFFTW()
906 @*/
907 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
908 {
909   PetscErrorCode ierr;
910 
911   PetscFunctionBegin;
912   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
918 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
919 {
920   PetscErrorCode ierr;
921   MPI_Comm       comm;
922   Mat_FFT        *fft  = (Mat_FFT*)A->data;
923   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
924   PetscInt       N     = fft->N;
925   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
926   PetscInt       low;
927   PetscMPIInt    rank,size;
928   ptrdiff_t      local_n0,local_0_start;
929   ptrdiff_t      local_n1,local_1_start;
930   VecScatter     vecscat;
931   IS             list1,list2;
932 #if !defined(PETSC_USE_COMPLEX)
933   PetscInt       i,j,k,partial_dim;
934   PetscInt       *indx1, *indx2, tempindx, tempindx1;
935   PetscInt       NM;
936   ptrdiff_t      temp;
937 #endif
938 
939   PetscFunctionBegin;
940   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
942   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
943   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
944 
945   if (size==1) {
946     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
947     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
948     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
949     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
951     ierr = ISDestroy(&list1);CHKERRQ(ierr);
952 
953   } else {
954     switch (ndim) {
955     case 1:
956 #if defined(PETSC_USE_COMPLEX)
957       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
958 
959       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
960       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
961       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
962       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
964       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
965       ierr = ISDestroy(&list1);CHKERRQ(ierr);
966       ierr = ISDestroy(&list2);CHKERRQ(ierr);
967 #else
968       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
969 #endif
970       break;
971     case 2:
972 #if defined(PETSC_USE_COMPLEX)
973       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
974 
975       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
976       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
977       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
978       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
979       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
981       ierr = ISDestroy(&list1);CHKERRQ(ierr);
982       ierr = ISDestroy(&list2);CHKERRQ(ierr);
983 #else
984       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
985 
986       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
987       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
988 
989       if (dim[1]%2==0) NM = dim[1]+2;
990       else             NM = dim[1]+1;
991 
992       for (i=0; i<local_n0; i++) {
993         for (j=0; j<dim[1]; j++) {
994           tempindx = i*dim[1] + j;
995           tempindx1 = i*NM + j;
996 
997           indx1[tempindx]=local_0_start*dim[1]+tempindx;
998           indx2[tempindx]=low+tempindx1;
999         }
1000       }
1001 
1002       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1003       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1004 
1005       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1006       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1009       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1010       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1011       ierr = PetscFree(indx1);CHKERRQ(ierr);
1012       ierr = PetscFree(indx2);CHKERRQ(ierr);
1013 #endif
1014       break;
1015     case 3:
1016 #if defined(PETSC_USE_COMPLEX)
1017       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1018 
1019       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1020       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1021       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1022       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1024       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1025       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1026       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1027 #else
1028       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1029 
1030       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1031       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1032 
1033       if (dim[2]%2==0) NM = dim[2]+2;
1034       else             NM = dim[2]+1;
1035 
1036       for (i=0; i<local_n0; i++) {
1037         for (j=0; j<dim[1]; j++) {
1038           for (k=0; k<dim[2]; k++) {
1039             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1040             tempindx1 = i*dim[1]*NM + j*NM + k;
1041 
1042             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1043             indx2[tempindx]=low+tempindx1;
1044           }
1045         }
1046       }
1047 
1048       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1049       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1050 
1051       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1052       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1055       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1056       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1057       ierr = PetscFree(indx1);CHKERRQ(ierr);
1058       ierr = PetscFree(indx2);CHKERRQ(ierr);
1059 #endif
1060       break;
1061     default:
1062 #if defined(PETSC_USE_COMPLEX)
1063       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1064 
1065       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1066       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1067       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1068       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1071       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1072       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1073 #else
1074       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1075 
1076       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1077 
1078       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1079 
1080       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1081 
1082       partial_dim = fftw->partial_dim;
1083 
1084       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1085       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1086 
1087       if (dim[ndim-1]%2==0) NM = 2;
1088       else                  NM = 1;
1089 
1090       j = low;
1091       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1092         indx1[i] = local_0_start*partial_dim + i;
1093         indx2[i] = j;
1094         if (k%dim[ndim-1]==0) j+=NM;
1095         j++;
1096       }
1097       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1098       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1099 
1100       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1101       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1102       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1103       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1104       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1105       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1106       ierr = PetscFree(indx1);CHKERRQ(ierr);
1107       ierr = PetscFree(indx2);CHKERRQ(ierr);
1108 #endif
1109       break;
1110     }
1111   }
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatCreate_FFTW"
1117 /*
1118     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1119 
1120   Options Database Keys:
1121 + -mat_fftw_plannerflags - set FFTW planner flags
1122 
1123    Level: intermediate
1124 
1125 */
1126 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1127 {
1128   PetscErrorCode ierr;
1129   MPI_Comm       comm;
1130   Mat_FFT        *fft=(Mat_FFT*)A->data;
1131   Mat_FFTW       *fftw;
1132   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1133   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1134   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1135   PetscBool      flg;
1136   PetscInt       p_flag,partial_dim=1,ctr;
1137   PetscMPIInt    size,rank;
1138   ptrdiff_t      *pdim;
1139   ptrdiff_t      local_n1,local_1_start;
1140 #if !defined(PETSC_USE_COMPLEX)
1141   ptrdiff_t      temp;
1142   PetscInt       N1,tot_dim;
1143 #else
1144   PetscInt       n1;
1145 #endif
1146 
1147   PetscFunctionBegin;
1148   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1149   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1150   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1151 
1152   fftw_mpi_init();
1153   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1154   pdim[0] = dim[0];
1155 #if !defined(PETSC_USE_COMPLEX)
1156   tot_dim = 2*dim[0];
1157 #endif
1158   for (ctr=1; ctr<ndim; ctr++) {
1159     partial_dim *= dim[ctr];
1160     pdim[ctr]    = dim[ctr];
1161 #if !defined(PETSC_USE_COMPLEX)
1162     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1163     else             tot_dim *= dim[ctr];
1164 #endif
1165   }
1166 
1167   if (size == 1) {
1168 #if defined(PETSC_USE_COMPLEX)
1169     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1170     n    = N;
1171 #else
1172     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1173     n    = tot_dim;
1174 #endif
1175 
1176   } else {
1177     ptrdiff_t local_n0,local_0_start;
1178     switch (ndim) {
1179     case 1:
1180 #if !defined(PETSC_USE_COMPLEX)
1181       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1182 #else
1183       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1184 
1185       n    = (PetscInt)local_n0;
1186       n1   = (PetscInt)local_n1;
1187       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1188 #endif
1189       break;
1190     case 2:
1191 #if defined(PETSC_USE_COMPLEX)
1192       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1193       /*
1194        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1195        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1196        */
1197       n    = (PetscInt)local_n0*dim[1];
1198       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1199 #else
1200       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1201 
1202       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1203       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1204 #endif
1205       break;
1206     case 3:
1207 #if defined(PETSC_USE_COMPLEX)
1208       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1209 
1210       n    = (PetscInt)local_n0*dim[1]*dim[2];
1211       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1212 #else
1213       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1214 
1215       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1216       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1217 #endif
1218       break;
1219     default:
1220 #if defined(PETSC_USE_COMPLEX)
1221       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1222 
1223       n    = (PetscInt)local_n0*partial_dim;
1224       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1225 #else
1226       temp = pdim[ndim-1];
1227 
1228       pdim[ndim-1] = temp/2 + 1;
1229 
1230       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1231 
1232       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1233       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1234 
1235       pdim[ndim-1] = temp;
1236 
1237       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1238 #endif
1239       break;
1240     }
1241   }
1242   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1243   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1244   fft->data = (void*)fftw;
1245 
1246   fft->n            = n;
1247   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1248   fftw->partial_dim = partial_dim;
1249 
1250   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1251   if (size == 1) {
1252 #if defined(PETSC_USE_64BIT_INDICES)
1253     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1254 #else
1255     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1256 #endif
1257   }
1258 
1259   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1260 
1261   fftw->p_forward  = 0;
1262   fftw->p_backward = 0;
1263   fftw->p_flag     = FFTW_ESTIMATE;
1264 
1265   if (size == 1) {
1266     A->ops->mult          = MatMult_SeqFFTW;
1267     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1268   } else {
1269     A->ops->mult          = MatMult_MPIFFTW;
1270     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1271   }
1272   fft->matdestroy = MatDestroy_FFTW;
1273   A->assembled    = PETSC_TRUE;
1274   A->preallocated = PETSC_TRUE;
1275 
1276   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1279 
1280   /* get runtime options */
1281   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1282   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1283   if (flg) {
1284     fftw->p_flag = iplans[p_flag];
1285   }
1286   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 
1291 
1292 
1293