xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 2b8d69ca7ea5fe9190df62c1dce3bbd66fce84dd)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t    ndim_fftw,*dim_fftw;
14 #if defined(PETSC_USE_64BIT_INDICES)
15   fftw_iodim64 *iodims;
16 #else
17   fftw_iodim   *iodims;
18 #endif
19   PetscInt     partial_dim;
20   fftw_plan    p_forward,p_backward;
21   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23                                                             executed for the arrays with which the plan was created */
24 } Mat_FFTW;
25 
26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30 extern PetscErrorCode MatDestroy_FFTW(Mat);
31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32 
33 /* MatMult_SeqFFTW performs forward DFT in parallel
34    Input parameter:
35      A - the matrix
36      x - the vector on which FDFT will be performed
37 
38    Output parameter:
39      y - vector that stores result of FDFT
40 */
41 #undef __FUNCT__
42 #define __FUNCT__ "MatMult_SeqFFTW"
43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44 {
45   PetscErrorCode ierr;
46   Mat_FFT        *fft  = (Mat_FFT*)A->data;
47   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
48   const PetscScalar *x_array;
49   PetscScalar    *y_array;
50 #if defined(PETSC_USE_COMPLEX)
51 #if defined(PETSC_USE_64BIT_INDICES)
52   fftw_iodim64   *iodims;
53 #else
54   fftw_iodim     *iodims;
55 #endif
56   PetscInt       i;
57 #endif
58   PetscInt       ndim = fft->ndim,*dim = fft->dim;
59 
60   PetscFunctionBegin;
61   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
62   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
63   if (!fftw->p_forward) { /* create a plan, then excute it */
64     switch (ndim) {
65     case 1:
66 #if defined(PETSC_USE_COMPLEX)
67       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68 #else
69       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
70 #endif
71       break;
72     case 2:
73 #if defined(PETSC_USE_COMPLEX)
74       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
75 #else
76       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
77 #endif
78       break;
79     case 3:
80 #if defined(PETSC_USE_COMPLEX)
81       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
82 #else
83       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
84 #endif
85       break;
86     default:
87 #if defined(PETSC_USE_COMPLEX)
88       iodims = fftw->iodims;
89 #if defined(PETSC_USE_64BIT_INDICES)
90       if (ndim) {
91         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
92         iodims[ndim-1].is = iodims[ndim-1].os = 1;
93         for (i=ndim-2; i>=0; --i) {
94           iodims[i].n = (ptrdiff_t)dim[i];
95           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
96         }
97       }
98       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
99 #else
100       if (ndim) {
101         iodims[ndim-1].n = (int)dim[ndim-1];
102         iodims[ndim-1].is = iodims[ndim-1].os = 1;
103         for (i=ndim-2; i>=0; --i) {
104           iodims[i].n = (int)dim[i];
105           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
106         }
107       }
108       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
109 #endif
110 
111 #else
112       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
113 #endif
114       break;
115     }
116     fftw->finarray  = (PetscScalar *) x_array;
117     fftw->foutarray = y_array;
118     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
119                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
120     fftw_execute(fftw->p_forward);
121   } else { /* use existing plan */
122     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
123 #if defined(PETSC_USE_COMPLEX)
124       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
125 #else
126       fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
127 #endif
128     } else {
129       fftw_execute(fftw->p_forward);
130     }
131   }
132   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
133   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 /* MatMultTranspose_SeqFFTW performs serial backward DFT
138    Input parameter:
139      A - the matrix
140      x - the vector on which BDFT will be performed
141 
142    Output parameter:
143      y - vector that stores result of BDFT
144 */
145 
146 #undef __FUNCT__
147 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
148 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
149 {
150   PetscErrorCode ierr;
151   Mat_FFT        *fft  = (Mat_FFT*)A->data;
152   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
153   const PetscScalar *x_array;
154   PetscScalar    *y_array;
155   PetscInt       ndim=fft->ndim,*dim=fft->dim;
156 #if defined(PETSC_USE_COMPLEX)
157 #if defined(PETSC_USE_64BIT_INDICES)
158   fftw_iodim64   *iodims=fftw->iodims;
159 #else
160   fftw_iodim     *iodims=fftw->iodims;
161 #endif
162 #endif
163 
164   PetscFunctionBegin;
165   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
166   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
167   if (!fftw->p_backward) { /* create a plan, then excute it */
168     switch (ndim) {
169     case 1:
170 #if defined(PETSC_USE_COMPLEX)
171       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
172 #else
173       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
174 #endif
175       break;
176     case 2:
177 #if defined(PETSC_USE_COMPLEX)
178       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
179 #else
180       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
181 #endif
182       break;
183     case 3:
184 #if defined(PETSC_USE_COMPLEX)
185       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
186 #else
187       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
188 #endif
189       break;
190     default:
191 #if defined(PETSC_USE_COMPLEX)
192 #if defined(PETSC_USE_64BIT_INDICES)
193       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
194 #else
195       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
196 #endif
197 #else
198       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
199 #endif
200       break;
201     }
202     fftw->binarray  = (PetscScalar *) x_array;
203     fftw->boutarray = y_array;
204     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
205   } else { /* use existing plan */
206     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
207 #if defined(PETSC_USE_COMPLEX)
208       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
209 #else
210       fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
211 #endif
212     } else {
213       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
214     }
215   }
216   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
217   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
218   PetscFunctionReturn(0);
219 }
220 
221 /* MatMult_MPIFFTW performs forward DFT in parallel
222    Input parameter:
223      A - the matrix
224      x - the vector on which FDFT will be performed
225 
226    Output parameter:
227    y   - vector that stores result of FDFT
228 */
229 #undef __FUNCT__
230 #define __FUNCT__ "MatMult_MPIFFTW"
231 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
232 {
233   PetscErrorCode ierr;
234   Mat_FFT        *fft  = (Mat_FFT*)A->data;
235   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
236   const PetscScalar *x_array;
237   PetscScalar    *y_array;
238   PetscInt       ndim=fft->ndim,*dim=fft->dim;
239   MPI_Comm       comm;
240 
241   PetscFunctionBegin;
242   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
243   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
244   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
245   if (!fftw->p_forward) { /* create a plan, then excute it */
246     switch (ndim) {
247     case 1:
248 #if defined(PETSC_USE_COMPLEX)
249       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
250 #else
251       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
252 #endif
253       break;
254     case 2:
255 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
256       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
257 #else
258       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
259 #endif
260       break;
261     case 3:
262 #if defined(PETSC_USE_COMPLEX)
263       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
264 #else
265       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
266 #endif
267       break;
268     default:
269 #if defined(PETSC_USE_COMPLEX)
270       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
271 #else
272       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
273 #endif
274       break;
275     }
276     fftw->finarray  = (PetscScalar *) x_array;
277     fftw->foutarray = y_array;
278     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
279                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
280     fftw_execute(fftw->p_forward);
281   } else { /* use existing plan */
282     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
283       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
284     } else {
285       fftw_execute(fftw->p_forward);
286     }
287   }
288   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
289   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
294    Input parameter:
295      A - the matrix
296      x - the vector on which BDFT will be performed
297 
298    Output parameter:
299      y - vector that stores result of BDFT
300 */
301 #undef __FUNCT__
302 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
303 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
304 {
305   PetscErrorCode ierr;
306   Mat_FFT        *fft  = (Mat_FFT*)A->data;
307   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
308   const PetscScalar *x_array;
309   PetscScalar    *y_array;
310   PetscInt       ndim=fft->ndim,*dim=fft->dim;
311   MPI_Comm       comm;
312 
313   PetscFunctionBegin;
314   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
315   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
316   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
317   if (!fftw->p_backward) { /* create a plan, then excute it */
318     switch (ndim) {
319     case 1:
320 #if defined(PETSC_USE_COMPLEX)
321       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
322 #else
323       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
324 #endif
325       break;
326     case 2:
327 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
328       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
329 #else
330       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
331 #endif
332       break;
333     case 3:
334 #if defined(PETSC_USE_COMPLEX)
335       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
336 #else
337       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
338 #endif
339       break;
340     default:
341 #if defined(PETSC_USE_COMPLEX)
342       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
343 #else
344       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
345 #endif
346       break;
347     }
348     fftw->binarray  = (PetscScalar *) x_array;
349     fftw->boutarray = y_array;
350     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
351   } else { /* use existing plan */
352     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
353       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
354     } else {
355       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
356     }
357   }
358   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
359   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatDestroy_FFTW"
365 PetscErrorCode MatDestroy_FFTW(Mat A)
366 {
367   Mat_FFT        *fft  = (Mat_FFT*)A->data;
368   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
369   PetscErrorCode ierr;
370 
371   PetscFunctionBegin;
372   fftw_destroy_plan(fftw->p_forward);
373   fftw_destroy_plan(fftw->p_backward);
374   if (fftw->iodims) {
375     free(fftw->iodims);
376   }
377   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
378   ierr = PetscFree(fft->data);CHKERRQ(ierr);
379   fftw_mpi_cleanup();
380   PetscFunctionReturn(0);
381 }
382 
383 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
384 #undef __FUNCT__
385 #define __FUNCT__ "VecDestroy_MPIFFTW"
386 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
387 {
388   PetscErrorCode ierr;
389   PetscScalar    *array;
390 
391   PetscFunctionBegin;
392   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
393   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
394   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
395   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
396   PetscFunctionReturn(0);
397 }
398 
399 #undef __FUNCT__
400 #define __FUNCT__ "MatCreateVecsFFTW"
401 /*@
402    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
403      parallel layout determined by FFTW
404 
405    Collective on Mat
406 
407    Input Parameter:
408 .   A - the matrix
409 
410    Output Parameter:
411 +   x - (optional) input vector of forward FFTW
412 -   y - (optional) output vector of forward FFTW
413 -   z - (optional) output vector of backward FFTW
414 
415   Level: advanced
416 
417   Note: The parallel layout of output of forward FFTW is always same as the input
418         of the backward FFTW. But parallel layout of the input vector of forward
419         FFTW might not be same as the output of backward FFTW.
420         Also note that we need to provide enough space while doing parallel real transform.
421         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
422         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
423         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
424         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
425         zeros if it is odd only one zero is needed.
426         Lastly one needs some scratch space at the end of data set in each process. alloc_local
427         figures out how much space is needed, i.e. it figures out the data+scratch space for
428         each processor and returns that.
429 
430 .seealso: MatCreateFFTW()
431 @*/
432 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
433 {
434   PetscErrorCode ierr;
435 
436   PetscFunctionBegin;
437   ierr = PetscTryMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatCreateVecsFFTW_FFTW"
443 PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
444 {
445   PetscErrorCode ierr;
446   PetscMPIInt    size,rank;
447   MPI_Comm       comm;
448   Mat_FFT        *fft  = (Mat_FFT*)A->data;
449   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
450   PetscInt       N     = fft->N;
451   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
452 
453   PetscFunctionBegin;
454   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
455   PetscValidType(A,1);
456   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
457 
458   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
459   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
460   if (size == 1) { /* sequential case */
461 #if defined(PETSC_USE_COMPLEX)
462     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
463     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
464     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
465 #else
466     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
467     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
468     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
469 #endif
470   } else { /* parallel cases */
471     ptrdiff_t    alloc_local,local_n0,local_0_start;
472     ptrdiff_t    local_n1;
473     fftw_complex *data_fout;
474     ptrdiff_t    local_1_start;
475 #if defined(PETSC_USE_COMPLEX)
476     fftw_complex *data_fin,*data_bout;
477 #else
478     double    *data_finr,*data_boutr;
479     PetscInt  n1,N1;
480     ptrdiff_t temp;
481 #endif
482 
483     switch (ndim) {
484     case 1:
485 #if !defined(PETSC_USE_COMPLEX)
486       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
487 #else
488       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
489       if (fin) {
490         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
491         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
492 
493         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
494       }
495       if (fout) {
496         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
497         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
498 
499         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
500       }
501       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
502       if (bout) {
503         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
504         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
505 
506         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
507       }
508       break;
509 #endif
510     case 2:
511 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
512       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
513       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
514       if (fin) {
515         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
516         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
517 
518         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
519       }
520       if (bout) {
521         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
522         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
523 
524         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
525       }
526       if (fout) {
527         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
529 
530         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
531       }
532 #else
533       /* Get local size */
534       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
535       if (fin) {
536         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
537         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
538 
539         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
540       }
541       if (fout) {
542         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
543         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
544 
545         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
546       }
547       if (bout) {
548         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
549         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
550 
551         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
552       }
553 #endif
554       break;
555     case 3:
556 #if !defined(PETSC_USE_COMPLEX)
557       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
558       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
559       if (fin) {
560         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
561         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
562 
563         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
564       }
565       if (bout) {
566         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
567         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
568 
569         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
570       }
571 
572       if (fout) {
573         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
574         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
575 
576         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
577       }
578 #else
579       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
580       if (fin) {
581         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
582         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
583 
584         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
585       }
586       if (fout) {
587         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
588         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
589 
590         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
591       }
592       if (bout) {
593         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
594         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
595 
596         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
597       }
598 #endif
599       break;
600     default:
601 #if !defined(PETSC_USE_COMPLEX)
602       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
603 
604       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
605 
606       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
607       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
608 
609       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
610 
611       if (fin) {
612         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
613         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
614 
615         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
616       }
617       if (bout) {
618         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
619         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
620 
621         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
622       }
623 
624       if (fout) {
625         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
626         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
627 
628         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
629       }
630 #else
631       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
632       if (fin) {
633         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
634         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
635 
636         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
637       }
638       if (fout) {
639         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
640         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
641 
642         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
643       }
644       if (bout) {
645         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
646         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
647 
648         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
649       }
650 #endif
651       break;
652     }
653   }
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "VecScatterPetscToFFTW"
659 /*@
660    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
661 
662    Collective on Mat
663 
664    Input Parameters:
665 +  A - FFTW matrix
666 -  x - the PETSc vector
667 
668    Output Parameters:
669 .  y - the FFTW vector
670 
671   Options Database Keys:
672 . -mat_fftw_plannerflags - set FFTW planner flags
673 
674    Level: intermediate
675 
676    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
677          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
678          zeros. This routine does that job by scattering operation.
679 
680 .seealso: VecScatterFFTWToPetsc()
681 @*/
682 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
683 {
684   PetscErrorCode ierr;
685 
686   PetscFunctionBegin;
687   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
693 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
694 {
695   PetscErrorCode ierr;
696   MPI_Comm       comm;
697   Mat_FFT        *fft  = (Mat_FFT*)A->data;
698   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
699   PetscInt       N     =fft->N;
700   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
701   PetscInt       low;
702   PetscMPIInt    rank,size;
703   PetscInt       vsize,vsize1;
704   ptrdiff_t      local_n0,local_0_start;
705   ptrdiff_t      local_n1,local_1_start;
706   VecScatter     vecscat;
707   IS             list1,list2;
708 #if !defined(PETSC_USE_COMPLEX)
709   PetscInt       i,j,k,partial_dim;
710   PetscInt       *indx1, *indx2, tempindx, tempindx1;
711   PetscInt       NM;
712   ptrdiff_t      temp;
713 #endif
714 
715   PetscFunctionBegin;
716   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
717   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
718   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
719   ierr = VecGetOwnershipRange(y,&low,NULL);
720 
721   if (size==1) {
722     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
723     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
724     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
725     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
726     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
729     ierr = ISDestroy(&list1);CHKERRQ(ierr);
730   } else {
731     switch (ndim) {
732     case 1:
733 #if defined(PETSC_USE_COMPLEX)
734       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
735 
736       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
737       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
738       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
739       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
741       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
742       ierr = ISDestroy(&list1);CHKERRQ(ierr);
743       ierr = ISDestroy(&list2);CHKERRQ(ierr);
744 #else
745       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
746 #endif
747       break;
748     case 2:
749 #if defined(PETSC_USE_COMPLEX)
750       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
751 
752       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
753       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
754       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
755       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
756       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
757       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
758       ierr = ISDestroy(&list1);CHKERRQ(ierr);
759       ierr = ISDestroy(&list2);CHKERRQ(ierr);
760 #else
761       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
762 
763       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
764       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
765 
766       if (dim[1]%2==0) {
767         NM = dim[1]+2;
768       } else {
769         NM = dim[1]+1;
770       }
771       for (i=0; i<local_n0; i++) {
772         for (j=0; j<dim[1]; j++) {
773           tempindx  = i*dim[1] + j;
774           tempindx1 = i*NM + j;
775 
776           indx1[tempindx]=local_0_start*dim[1]+tempindx;
777           indx2[tempindx]=low+tempindx1;
778         }
779       }
780 
781       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
782       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
783 
784       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
785       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
786       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
787       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
788       ierr = ISDestroy(&list1);CHKERRQ(ierr);
789       ierr = ISDestroy(&list2);CHKERRQ(ierr);
790       ierr = PetscFree(indx1);CHKERRQ(ierr);
791       ierr = PetscFree(indx2);CHKERRQ(ierr);
792 #endif
793       break;
794 
795     case 3:
796 #if defined(PETSC_USE_COMPLEX)
797       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
798 
799       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
800       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
801       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
802       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
803       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
804       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
805       ierr = ISDestroy(&list1);CHKERRQ(ierr);
806       ierr = ISDestroy(&list2);CHKERRQ(ierr);
807 #else
808       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
809       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
810       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
811 
812       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
813       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
814 
815       if (dim[2]%2==0) NM = dim[2]+2;
816       else             NM = dim[2]+1;
817 
818       for (i=0; i<local_n0; i++) {
819         for (j=0; j<dim[1]; j++) {
820           for (k=0; k<dim[2]; k++) {
821             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
822             tempindx1 = i*dim[1]*NM + j*NM + k;
823 
824             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
825             indx2[tempindx]=low+tempindx1;
826           }
827         }
828       }
829 
830       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
831       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
832       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
833       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
836       ierr = ISDestroy(&list1);CHKERRQ(ierr);
837       ierr = ISDestroy(&list2);CHKERRQ(ierr);
838       ierr = PetscFree(indx1);CHKERRQ(ierr);
839       ierr = PetscFree(indx2);CHKERRQ(ierr);
840 #endif
841       break;
842 
843     default:
844 #if defined(PETSC_USE_COMPLEX)
845       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
846 
847       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
848       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
849       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
850       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
851       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
852       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
853       ierr = ISDestroy(&list1);CHKERRQ(ierr);
854       ierr = ISDestroy(&list2);CHKERRQ(ierr);
855 #else
856       /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */
857       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
858       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
859 
860       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
861 
862       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
863 
864       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
865 
866       partial_dim = fftw->partial_dim;
867 
868       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
869       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
870 
871       if (dim[ndim-1]%2==0) NM = 2;
872       else                  NM = 1;
873 
874       j = low;
875       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
876         indx1[i] = local_0_start*partial_dim + i;
877         indx2[i] = j;
878         if (k%dim[ndim-1]==0) j+=NM;
879         j++;
880       }
881       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
882       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
883       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
884       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
887       ierr = ISDestroy(&list1);CHKERRQ(ierr);
888       ierr = ISDestroy(&list2);CHKERRQ(ierr);
889       ierr = PetscFree(indx1);CHKERRQ(ierr);
890       ierr = PetscFree(indx2);CHKERRQ(ierr);
891 #endif
892       break;
893     }
894   }
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "VecScatterFFTWToPetsc"
900 /*@
901    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
902 
903    Collective on Mat
904 
905     Input Parameters:
906 +   A - FFTW matrix
907 -   x - FFTW vector
908 
909    Output Parameters:
910 .  y - PETSc vector
911 
912    Level: intermediate
913 
914    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
915          VecScatterFFTWToPetsc removes those extra zeros.
916 
917 .seealso: VecScatterPetscToFFTW()
918 @*/
919 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
920 {
921   PetscErrorCode ierr;
922 
923   PetscFunctionBegin;
924   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
930 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
931 {
932   PetscErrorCode ierr;
933   MPI_Comm       comm;
934   Mat_FFT        *fft  = (Mat_FFT*)A->data;
935   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
936   PetscInt       N     = fft->N;
937   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
938   PetscInt       low;
939   PetscMPIInt    rank,size;
940   ptrdiff_t      local_n0,local_0_start;
941   ptrdiff_t      local_n1,local_1_start;
942   VecScatter     vecscat;
943   IS             list1,list2;
944 #if !defined(PETSC_USE_COMPLEX)
945   PetscInt       i,j,k,partial_dim;
946   PetscInt       *indx1, *indx2, tempindx, tempindx1;
947   PetscInt       NM;
948   ptrdiff_t      temp;
949 #endif
950 
951   PetscFunctionBegin;
952   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
953   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
954   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
955   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
956 
957   if (size==1) {
958     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
959     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
960     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
963     ierr = ISDestroy(&list1);CHKERRQ(ierr);
964 
965   } else {
966     switch (ndim) {
967     case 1:
968 #if defined(PETSC_USE_COMPLEX)
969       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
970 
971       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
972       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
973       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
974       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
977       ierr = ISDestroy(&list1);CHKERRQ(ierr);
978       ierr = ISDestroy(&list2);CHKERRQ(ierr);
979 #else
980       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
981 #endif
982       break;
983     case 2:
984 #if defined(PETSC_USE_COMPLEX)
985       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
986 
987       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
988       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
989       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
990       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
993       ierr = ISDestroy(&list1);CHKERRQ(ierr);
994       ierr = ISDestroy(&list2);CHKERRQ(ierr);
995 #else
996       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
997 
998       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
999       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
1000 
1001       if (dim[1]%2==0) NM = dim[1]+2;
1002       else             NM = dim[1]+1;
1003 
1004       for (i=0; i<local_n0; i++) {
1005         for (j=0; j<dim[1]; j++) {
1006           tempindx = i*dim[1] + j;
1007           tempindx1 = i*NM + j;
1008 
1009           indx1[tempindx]=local_0_start*dim[1]+tempindx;
1010           indx2[tempindx]=low+tempindx1;
1011         }
1012       }
1013 
1014       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1015       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1016 
1017       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1018       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1019       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1020       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1021       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1022       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1023       ierr = PetscFree(indx1);CHKERRQ(ierr);
1024       ierr = PetscFree(indx2);CHKERRQ(ierr);
1025 #endif
1026       break;
1027     case 3:
1028 #if defined(PETSC_USE_COMPLEX)
1029       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1030 
1031       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1032       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1033       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1034       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1037       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1038       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1039 #else
1040       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1041 
1042       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1043       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1044 
1045       if (dim[2]%2==0) NM = dim[2]+2;
1046       else             NM = dim[2]+1;
1047 
1048       for (i=0; i<local_n0; i++) {
1049         for (j=0; j<dim[1]; j++) {
1050           for (k=0; k<dim[2]; k++) {
1051             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1052             tempindx1 = i*dim[1]*NM + j*NM + k;
1053 
1054             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1055             indx2[tempindx]=low+tempindx1;
1056           }
1057         }
1058       }
1059 
1060       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1061       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1062 
1063       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1064       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1065       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1066       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1067       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1068       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1069       ierr = PetscFree(indx1);CHKERRQ(ierr);
1070       ierr = PetscFree(indx2);CHKERRQ(ierr);
1071 #endif
1072       break;
1073     default:
1074 #if defined(PETSC_USE_COMPLEX)
1075       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1076 
1077       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1078       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1079       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1080       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1082       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1083       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1084       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1085 #else
1086       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1087 
1088       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1089 
1090       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1091 
1092       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1093 
1094       partial_dim = fftw->partial_dim;
1095 
1096       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1097       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1098 
1099       if (dim[ndim-1]%2==0) NM = 2;
1100       else                  NM = 1;
1101 
1102       j = low;
1103       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1104         indx1[i] = local_0_start*partial_dim + i;
1105         indx2[i] = j;
1106         if (k%dim[ndim-1]==0) j+=NM;
1107         j++;
1108       }
1109       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1110       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1111 
1112       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1113       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1115       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1116       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1117       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1118       ierr = PetscFree(indx1);CHKERRQ(ierr);
1119       ierr = PetscFree(indx2);CHKERRQ(ierr);
1120 #endif
1121       break;
1122     }
1123   }
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatCreate_FFTW"
1129 /*
1130     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1131 
1132   Options Database Keys:
1133 + -mat_fftw_plannerflags - set FFTW planner flags
1134 
1135    Level: intermediate
1136 
1137 */
1138 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1139 {
1140   PetscErrorCode ierr;
1141   MPI_Comm       comm;
1142   Mat_FFT        *fft=(Mat_FFT*)A->data;
1143   Mat_FFTW       *fftw;
1144   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1145   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1146   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1147   PetscBool      flg;
1148   PetscInt       p_flag,partial_dim=1,ctr;
1149   PetscMPIInt    size,rank;
1150   ptrdiff_t      *pdim;
1151   ptrdiff_t      local_n1,local_1_start;
1152 #if !defined(PETSC_USE_COMPLEX)
1153   ptrdiff_t      temp;
1154   PetscInt       N1,tot_dim;
1155 #else
1156   PetscInt       n1;
1157 #endif
1158 
1159   PetscFunctionBegin;
1160   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1161   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1162   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1163 
1164   fftw_mpi_init();
1165   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1166   pdim[0] = dim[0];
1167 #if !defined(PETSC_USE_COMPLEX)
1168   tot_dim = 2*dim[0];
1169 #endif
1170   for (ctr=1; ctr<ndim; ctr++) {
1171     partial_dim *= dim[ctr];
1172     pdim[ctr]    = dim[ctr];
1173 #if !defined(PETSC_USE_COMPLEX)
1174     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1175     else             tot_dim *= dim[ctr];
1176 #endif
1177   }
1178 
1179   if (size == 1) {
1180 #if defined(PETSC_USE_COMPLEX)
1181     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1182     n    = N;
1183 #else
1184     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1185     n    = tot_dim;
1186 #endif
1187 
1188   } else {
1189     ptrdiff_t local_n0,local_0_start;
1190     switch (ndim) {
1191     case 1:
1192 #if !defined(PETSC_USE_COMPLEX)
1193       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1194 #else
1195       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1196 
1197       n    = (PetscInt)local_n0;
1198       n1   = (PetscInt)local_n1;
1199       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1200 #endif
1201       break;
1202     case 2:
1203 #if defined(PETSC_USE_COMPLEX)
1204       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1205       /*
1206        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1207        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1208        */
1209       n    = (PetscInt)local_n0*dim[1];
1210       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1211 #else
1212       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1213 
1214       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1215       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1216 #endif
1217       break;
1218     case 3:
1219 #if defined(PETSC_USE_COMPLEX)
1220       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1221 
1222       n    = (PetscInt)local_n0*dim[1]*dim[2];
1223       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1224 #else
1225       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1226 
1227       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1228       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1229 #endif
1230       break;
1231     default:
1232 #if defined(PETSC_USE_COMPLEX)
1233       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1234 
1235       n    = (PetscInt)local_n0*partial_dim;
1236       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1237 #else
1238       temp = pdim[ndim-1];
1239 
1240       pdim[ndim-1] = temp/2 + 1;
1241 
1242       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1243 
1244       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1245       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1246 
1247       pdim[ndim-1] = temp;
1248 
1249       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1250 #endif
1251       break;
1252     }
1253   }
1254   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1255   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1256   fft->data = (void*)fftw;
1257 
1258   fft->n            = n;
1259   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1260   fftw->partial_dim = partial_dim;
1261 
1262   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1263   if (size == 1) {
1264 #if defined(PETSC_USE_64BIT_INDICES)
1265     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1266 #else
1267     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1268 #endif
1269   }
1270 
1271   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1272 
1273   fftw->p_forward  = 0;
1274   fftw->p_backward = 0;
1275   fftw->p_flag     = FFTW_ESTIMATE;
1276 
1277   if (size == 1) {
1278     A->ops->mult          = MatMult_SeqFFTW;
1279     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1280   } else {
1281     A->ops->mult          = MatMult_MPIFFTW;
1282     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1283   }
1284   fft->matdestroy = MatDestroy_FFTW;
1285   A->assembled    = PETSC_TRUE;
1286   A->preallocated = PETSC_TRUE;
1287 
1288   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1291 
1292   /* get runtime options */
1293   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1294   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1295   if (flg) {
1296     fftw->p_flag = iplans[p_flag];
1297   }
1298   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 
1304 
1305