xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision a1b6d50c236cc8b4c78decb1d66d5347aeec3d46)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14 #if defined(PETSC_USE_64BIT_INDICES)
15   fftw_iodim64 *iodims;
16 #else
17   fftw_iodim   *iodims;
18 #endif
19   PetscInt    partial_dim;
20   fftw_plan   p_forward,p_backward;
21   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23                                                             executed for the arrays with which the plan was created */
24 } Mat_FFTW;
25 
26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30 extern PetscErrorCode MatDestroy_FFTW(Mat);
31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
32 
33 /* MatMult_SeqFFTW performs forward DFT in parallel
34    Input parameter:
35      A - the matrix
36      x - the vector on which FDFT will be performed
37 
38    Output parameter:
39      y - vector that stores result of FDFT
40 */
41 #undef __FUNCT__
42 #define __FUNCT__ "MatMult_SeqFFTW"
43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
44 {
45   PetscErrorCode ierr;
46   Mat_FFT        *fft  = (Mat_FFT*)A->data;
47   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
48   PetscScalar    *x_array,*y_array;
49 #if defined(PETSC_USE_64BIT_INDICES)
50   fftw_iodim64   *iodims;
51 #else
52   fftw_iodim     *iodims;
53 #endif
54   PetscInt       ndim = fft->ndim,*dim = fft->dim,i;
55 
56   PetscFunctionBegin;
57   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
58   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
59   if (!fftw->p_forward) { /* create a plan, then excute it */
60     switch (ndim) {
61     case 1:
62 #if defined(PETSC_USE_COMPLEX)
63       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
64 #else
65       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
66 #endif
67       break;
68     case 2:
69 #if defined(PETSC_USE_COMPLEX)
70       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
71 #else
72       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
73 #endif
74       break;
75     case 3:
76 #if defined(PETSC_USE_COMPLEX)
77       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
78 #else
79       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
80 #endif
81       break;
82     default:
83 #if defined(PETSC_USE_COMPLEX)
84       iodims = fftw->iodims;
85 #if defined(PETSC_USE_64BIT_INDICES)
86       if (ndim) {
87         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
88         iodims[ndim-1].is = iodims[ndim-1].os = 1;
89         for (i=ndim-2; i>=0; --i) {
90           iodims[i].n = (ptrdiff_t)dim[i];
91           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
92         }
93       }
94       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
95 #else
96       if (ndim) {
97         iodims[ndim-1].n = (int)dim[ndim-1];
98         iodims[ndim-1].is = iodims[ndim-1].os = 1;
99         for (i=ndim-2; i>=0; --i) {
100           iodims[i].n = (int)dim[i];
101           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
102         }
103       }
104       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
105 #endif
106 
107 #else
108       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
109 #endif
110       break;
111     }
112     fftw->finarray  = x_array;
113     fftw->foutarray = y_array;
114     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
115                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
116     fftw_execute(fftw->p_forward);
117   } else { /* use existing plan */
118     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
119       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
120     } else {
121       fftw_execute(fftw->p_forward);
122     }
123   }
124   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
125   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 /* MatMultTranspose_SeqFFTW performs serial backward DFT
130    Input parameter:
131      A - the matrix
132      x - the vector on which BDFT will be performed
133 
134    Output parameter:
135      y - vector that stores result of BDFT
136 */
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
140 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
141 {
142   PetscErrorCode ierr;
143   Mat_FFT        *fft  = (Mat_FFT*)A->data;
144   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
145   PetscScalar    *x_array,*y_array;
146   PetscInt       ndim=fft->ndim,*dim=fft->dim;
147 #if defined(PETSC_USE_64BIT_INDICES)
148   fftw_iodim64   *iodims=fftw->iodims;
149 #else
150   fftw_iodim     *iodims=fftw->iodims;
151 #endif
152 
153   PetscFunctionBegin;
154   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
155   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
156   if (!fftw->p_backward) { /* create a plan, then excute it */
157     switch (ndim) {
158     case 1:
159 #if defined(PETSC_USE_COMPLEX)
160       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
161 #else
162       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
163 #endif
164       break;
165     case 2:
166 #if defined(PETSC_USE_COMPLEX)
167       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
168 #else
169       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
170 #endif
171       break;
172     case 3:
173 #if defined(PETSC_USE_COMPLEX)
174       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
175 #else
176       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
177 #endif
178       break;
179     default:
180 #if defined(PETSC_USE_COMPLEX)
181 #if defined(PETSC_USE_64BIT_INDICES)
182       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
183 #else
184       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
185 #endif
186 #else
187       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
188 #endif
189       break;
190     }
191     fftw->binarray  = x_array;
192     fftw->boutarray = y_array;
193     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
194   } else { /* use existing plan */
195     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
196       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
197     } else {
198       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
199     }
200   }
201   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
202   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 /* MatMult_MPIFFTW performs forward DFT in parallel
207    Input parameter:
208      A - the matrix
209      x - the vector on which FDFT will be performed
210 
211    Output parameter:
212    y   - vector that stores result of FDFT
213 */
214 #undef __FUNCT__
215 #define __FUNCT__ "MatMult_MPIFFTW"
216 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
217 {
218   PetscErrorCode ierr;
219   Mat_FFT        *fft  = (Mat_FFT*)A->data;
220   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
221   PetscScalar    *x_array,*y_array;
222   PetscInt       ndim=fft->ndim,*dim=fft->dim;
223   MPI_Comm       comm;
224 
225   PetscFunctionBegin;
226   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
227   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
228   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
229   if (!fftw->p_forward) { /* create a plan, then excute it */
230     switch (ndim) {
231     case 1:
232 #if defined(PETSC_USE_COMPLEX)
233       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
234 #else
235       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
236 #endif
237       break;
238     case 2:
239 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
240       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
241 #else
242       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
243 #endif
244       break;
245     case 3:
246 #if defined(PETSC_USE_COMPLEX)
247       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
248 #else
249       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
250 #endif
251       break;
252     default:
253 #if defined(PETSC_USE_COMPLEX)
254       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
255 #else
256       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
257 #endif
258       break;
259     }
260     fftw->finarray  = x_array;
261     fftw->foutarray = y_array;
262     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
263                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
264     fftw_execute(fftw->p_forward);
265   } else { /* use existing plan */
266     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
267       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
268     } else {
269       fftw_execute(fftw->p_forward);
270     }
271   }
272   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
273   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
278    Input parameter:
279      A - the matrix
280      x - the vector on which BDFT will be performed
281 
282    Output parameter:
283      y - vector that stores result of BDFT
284 */
285 #undef __FUNCT__
286 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
287 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
288 {
289   PetscErrorCode ierr;
290   Mat_FFT        *fft  = (Mat_FFT*)A->data;
291   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
292   PetscScalar    *x_array,*y_array;
293   PetscInt       ndim=fft->ndim,*dim=fft->dim;
294   MPI_Comm       comm;
295 
296   PetscFunctionBegin;
297   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
298   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
299   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
300   if (!fftw->p_backward) { /* create a plan, then excute it */
301     switch (ndim) {
302     case 1:
303 #if defined(PETSC_USE_COMPLEX)
304       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
305 #else
306       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
307 #endif
308       break;
309     case 2:
310 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
311       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
312 #else
313       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
314 #endif
315       break;
316     case 3:
317 #if defined(PETSC_USE_COMPLEX)
318       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
319 #else
320       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
321 #endif
322       break;
323     default:
324 #if defined(PETSC_USE_COMPLEX)
325       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
326 #else
327       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
328 #endif
329       break;
330     }
331     fftw->binarray  = x_array;
332     fftw->boutarray = y_array;
333     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
334   } else { /* use existing plan */
335     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
336       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
337     } else {
338       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
339     }
340   }
341   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
342   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "MatDestroy_FFTW"
348 PetscErrorCode MatDestroy_FFTW(Mat A)
349 {
350   Mat_FFT        *fft  = (Mat_FFT*)A->data;
351   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
352   PetscErrorCode ierr;
353 
354   PetscFunctionBegin;
355   fftw_destroy_plan(fftw->p_forward);
356   fftw_destroy_plan(fftw->p_backward);
357   if (fftw->iodims) {
358     free(fftw->iodims);
359   }
360   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
361   ierr = PetscFree(fft->data);CHKERRQ(ierr);
362   fftw_mpi_cleanup();
363   PetscFunctionReturn(0);
364 }
365 
366 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
367 #undef __FUNCT__
368 #define __FUNCT__ "VecDestroy_MPIFFTW"
369 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
370 {
371   PetscErrorCode ierr;
372   PetscScalar    *array;
373 
374   PetscFunctionBegin;
375   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
376   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
377   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
378   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
379   PetscFunctionReturn(0);
380 }
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "MatGetVecsFFTW"
384 /*@
385    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
386      parallel layout determined by FFTW
387 
388    Collective on Mat
389 
390    Input Parameter:
391 .   A - the matrix
392 
393    Output Parameter:
394 +   x - (optional) input vector of forward FFTW
395 -   y - (optional) output vector of forward FFTW
396 -   z - (optional) output vector of backward FFTW
397 
398   Level: advanced
399 
400   Note: The parallel layout of output of forward FFTW is always same as the input
401         of the backward FFTW. But parallel layout of the input vector of forward
402         FFTW might not be same as the output of backward FFTW.
403         Also note that we need to provide enough space while doing parallel real transform.
404         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
405         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
406         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
407         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
408         zeros if it is odd only one zero is needed.
409         Lastly one needs some scratch space at the end of data set in each process. alloc_local
410         figures out how much space is needed, i.e. it figures out the data+scratch space for
411         each processor and returns that.
412 
413 .seealso: MatCreateFFTW()
414 @*/
415 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
416 {
417   PetscErrorCode ierr;
418 
419   PetscFunctionBegin;
420   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
426 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
427 {
428   PetscErrorCode ierr;
429   PetscMPIInt    size,rank;
430   MPI_Comm       comm;
431   Mat_FFT        *fft  = (Mat_FFT*)A->data;
432   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
433   PetscInt       N     = fft->N;
434   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
435 
436   PetscFunctionBegin;
437   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
438   PetscValidType(A,1);
439   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
440 
441   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
442   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
443   if (size == 1) { /* sequential case */
444 #if defined(PETSC_USE_COMPLEX)
445     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
446     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
447     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
448 #else
449     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
450     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
451     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
452 #endif
453   } else { /* parallel cases */
454     ptrdiff_t    alloc_local,local_n0,local_0_start;
455     ptrdiff_t    local_n1;
456     fftw_complex *data_fout;
457     ptrdiff_t    local_1_start;
458 #if defined(PETSC_USE_COMPLEX)
459     fftw_complex *data_fin,*data_bout;
460 #else
461     double    *data_finr,*data_boutr;
462     PetscInt  n1,N1;
463     ptrdiff_t temp;
464 #endif
465 
466     switch (ndim) {
467     case 1:
468 #if !defined(PETSC_USE_COMPLEX)
469       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
470 #else
471       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
472       if (fin) {
473         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
474         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
475 
476         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
477       }
478       if (fout) {
479         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
480         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
481 
482         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
483       }
484       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
485       if (bout) {
486         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
487         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
488 
489         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
490       }
491       break;
492 #endif
493     case 2:
494 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
495       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
496       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
497       if (fin) {
498         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
499         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
500 
501         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
502       }
503       if (bout) {
504         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
505         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
506 
507         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
508       }
509       if (fout) {
510         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
511         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
512 
513         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
514       }
515 #else
516       /* Get local size */
517       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
518       if (fin) {
519         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
520         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
521 
522         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
523       }
524       if (fout) {
525         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
526         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
527 
528         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
529       }
530       if (bout) {
531         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
532         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
533 
534         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
535       }
536 #endif
537       break;
538     case 3:
539 #if !defined(PETSC_USE_COMPLEX)
540       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
541       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
542       if (fin) {
543         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
544         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
545 
546         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
547       }
548       if (bout) {
549         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
550         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
551 
552         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
553       }
554 
555       if (fout) {
556         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
557         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
558 
559         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
560       }
561 #else
562       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
563       if (fin) {
564         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
565         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
566 
567         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
568       }
569       if (fout) {
570         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
571         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
572 
573         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
574       }
575       if (bout) {
576         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
577         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
578 
579         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
580       }
581 #endif
582       break;
583     default:
584 #if !defined(PETSC_USE_COMPLEX)
585       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
586 
587       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
588 
589       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
590       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
591 
592       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
593 
594       if (fin) {
595         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
596         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
597 
598         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
599       }
600       if (bout) {
601         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
602         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
603 
604         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
605       }
606 
607       if (fout) {
608         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
609         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
610 
611         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
612       }
613 #else
614       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
615       if (fin) {
616         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
617         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
618 
619         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
620       }
621       if (fout) {
622         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
623         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
624 
625         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
626       }
627       if (bout) {
628         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
629         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
630 
631         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
632       }
633 #endif
634       break;
635     }
636   }
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "VecScatterPetscToFFTW"
642 /*@
643    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
644 
645    Collective on Mat
646 
647    Input Parameters:
648 +  A - FFTW matrix
649 -  x - the PETSc vector
650 
651    Output Parameters:
652 .  y - the FFTW vector
653 
654   Options Database Keys:
655 . -mat_fftw_plannerflags - set FFTW planner flags
656 
657    Level: intermediate
658 
659    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
660          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
661          zeros. This routine does that job by scattering operation.
662 
663 .seealso: VecScatterFFTWToPetsc()
664 @*/
665 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
666 {
667   PetscErrorCode ierr;
668 
669   PetscFunctionBegin;
670   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
676 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
677 {
678   PetscErrorCode ierr;
679   MPI_Comm       comm;
680   Mat_FFT        *fft  = (Mat_FFT*)A->data;
681   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
682   PetscInt       N     =fft->N;
683   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
684   PetscInt       low;
685   PetscMPIInt    rank,size;
686   PetscInt       vsize,vsize1;
687   ptrdiff_t      local_n0,local_0_start;
688   ptrdiff_t      local_n1,local_1_start;
689   VecScatter     vecscat;
690   IS             list1,list2;
691 #if !defined(PETSC_USE_COMPLEX)
692   PetscInt       i,j,k,partial_dim;
693   PetscInt       *indx1, *indx2, tempindx, tempindx1;
694   PetscInt       N1, n1,NM;
695   ptrdiff_t      temp;
696 #endif
697 
698   PetscFunctionBegin;
699   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
700   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
701   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
702   ierr = VecGetOwnershipRange(y,&low,NULL);
703 
704   if (size==1) {
705     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
706     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
707     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
708     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
709     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
710     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
711     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
712     ierr = ISDestroy(&list1);CHKERRQ(ierr);
713   } else {
714     switch (ndim) {
715     case 1:
716 #if defined(PETSC_USE_COMPLEX)
717       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
718 
719       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
720       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
721       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
722       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
723       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
724       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
725       ierr = ISDestroy(&list1);CHKERRQ(ierr);
726       ierr = ISDestroy(&list2);CHKERRQ(ierr);
727 #else
728       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
729 #endif
730       break;
731     case 2:
732 #if defined(PETSC_USE_COMPLEX)
733       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
734 
735       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
736       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
737       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
738       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
739       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
741       ierr = ISDestroy(&list1);CHKERRQ(ierr);
742       ierr = ISDestroy(&list2);CHKERRQ(ierr);
743 #else
744       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
745 
746       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
747       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
748       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
749 
750       if (dim[1]%2==0) {
751         NM = dim[1]+2;
752       } else {
753         NM = dim[1]+1;
754       }
755       for (i=0; i<local_n0; i++) {
756         for (j=0; j<dim[1]; j++) {
757           tempindx  = i*dim[1] + j;
758           tempindx1 = i*NM + j;
759 
760           indx1[tempindx]=local_0_start*dim[1]+tempindx;
761           indx2[tempindx]=low+tempindx1;
762         }
763       }
764 
765       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
766       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
767 
768       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
769       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
770       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
771       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
772       ierr = ISDestroy(&list1);CHKERRQ(ierr);
773       ierr = ISDestroy(&list2);CHKERRQ(ierr);
774       ierr = PetscFree(indx1);CHKERRQ(ierr);
775       ierr = PetscFree(indx2);CHKERRQ(ierr);
776 #endif
777       break;
778 
779     case 3:
780 #if defined(PETSC_USE_COMPLEX)
781       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
782 
783       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
784       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
785       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
786       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
787       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
788       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
789       ierr = ISDestroy(&list1);CHKERRQ(ierr);
790       ierr = ISDestroy(&list2);CHKERRQ(ierr);
791 #else
792       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
793 
794       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
795 
796       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
797       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
798 
799       if (dim[2]%2==0) NM = dim[2]+2;
800       else             NM = dim[2]+1;
801 
802       for (i=0; i<local_n0; i++) {
803         for (j=0; j<dim[1]; j++) {
804           for (k=0; k<dim[2]; k++) {
805             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
806             tempindx1 = i*dim[1]*NM + j*NM + k;
807 
808             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
809             indx2[tempindx]=low+tempindx1;
810           }
811         }
812       }
813 
814       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
815       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
816       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
817       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
818       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
819       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
820       ierr = ISDestroy(&list1);CHKERRQ(ierr);
821       ierr = ISDestroy(&list2);CHKERRQ(ierr);
822       ierr = PetscFree(indx1);CHKERRQ(ierr);
823       ierr = PetscFree(indx2);CHKERRQ(ierr);
824 #endif
825       break;
826 
827     default:
828 #if defined(PETSC_USE_COMPLEX)
829       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
830 
831       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
832       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
833       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
834       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
835       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
836       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
837       ierr = ISDestroy(&list1);CHKERRQ(ierr);
838       ierr = ISDestroy(&list2);CHKERRQ(ierr);
839 #else
840       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
841 
842       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
843 
844       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
845 
846       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
847 
848       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
849 
850       partial_dim = fftw->partial_dim;
851 
852       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
853       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
854 
855       if (dim[ndim-1]%2==0) NM = 2;
856       else                  NM = 1;
857 
858       j = low;
859       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
860         indx1[i] = local_0_start*partial_dim + i;
861         indx2[i] = j;
862         if (k%dim[ndim-1]==0) j+=NM;
863         j++;
864       }
865       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
866       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
867       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
868       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
870       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
871       ierr = ISDestroy(&list1);CHKERRQ(ierr);
872       ierr = ISDestroy(&list2);CHKERRQ(ierr);
873       ierr = PetscFree(indx1);CHKERRQ(ierr);
874       ierr = PetscFree(indx2);CHKERRQ(ierr);
875 #endif
876       break;
877     }
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "VecScatterFFTWToPetsc"
884 /*@
885    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
886 
887    Collective on Mat
888 
889     Input Parameters:
890 +   A - FFTW matrix
891 -   x - FFTW vector
892 
893    Output Parameters:
894 .  y - PETSc vector
895 
896    Level: intermediate
897 
898    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
899          VecScatterFFTWToPetsc removes those extra zeros.
900 
901 .seealso: VecScatterPetscToFFTW()
902 @*/
903 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
904 {
905   PetscErrorCode ierr;
906 
907   PetscFunctionBegin;
908   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 #undef __FUNCT__
913 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
914 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
915 {
916   PetscErrorCode ierr;
917   MPI_Comm       comm;
918   Mat_FFT        *fft  = (Mat_FFT*)A->data;
919   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
920   PetscInt       N     = fft->N;
921   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
922   PetscInt       low;
923   PetscMPIInt    rank,size;
924   ptrdiff_t      local_n0,local_0_start;
925   ptrdiff_t      local_n1,local_1_start;
926   VecScatter     vecscat;
927   IS             list1,list2;
928 #if !defined(PETSC_USE_COMPLEX)
929   PetscInt  i,j,k,partial_dim;
930   PetscInt  *indx1, *indx2, tempindx, tempindx1;
931   PetscInt  N1, n1,NM;
932   ptrdiff_t temp;
933 #endif
934 
935   PetscFunctionBegin;
936   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
937   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
938   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
939   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
940 
941   if (size==1) {
942     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
943     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
944     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
947     ierr = ISDestroy(&list1);CHKERRQ(ierr);
948 
949   } else {
950     switch (ndim) {
951     case 1:
952 #if defined(PETSC_USE_COMPLEX)
953       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
954 
955       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
956       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
957       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
958       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
961       ierr = ISDestroy(&list1);CHKERRQ(ierr);
962       ierr = ISDestroy(&list2);CHKERRQ(ierr);
963 #else
964       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
965 #endif
966       break;
967     case 2:
968 #if defined(PETSC_USE_COMPLEX)
969       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
970 
971       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
972       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
973       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
974       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
977       ierr = ISDestroy(&list1);CHKERRQ(ierr);
978       ierr = ISDestroy(&list2);CHKERRQ(ierr);
979 #else
980       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
981 
982       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
983 
984       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
985       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
986 
987       if (dim[1]%2==0) NM = dim[1]+2;
988       else             NM = dim[1]+1;
989 
990       for (i=0; i<local_n0; i++) {
991         for (j=0; j<dim[1]; j++) {
992           tempindx = i*dim[1] + j;
993           tempindx1 = i*NM + j;
994 
995           indx1[tempindx]=local_0_start*dim[1]+tempindx;
996           indx2[tempindx]=low+tempindx1;
997         }
998       }
999 
1000       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1001       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1002 
1003       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1004       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1007       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1008       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1009       ierr = PetscFree(indx1);CHKERRQ(ierr);
1010       ierr = PetscFree(indx2);CHKERRQ(ierr);
1011 #endif
1012       break;
1013     case 3:
1014 #if defined(PETSC_USE_COMPLEX)
1015       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1016 
1017       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1018       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
1019       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1020       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1022       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1023       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1024       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1025 #else
1026 
1027       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1028 
1029       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1030 
1031       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1032       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1033 
1034       if (dim[2]%2==0) NM = dim[2]+2;
1035       else             NM = dim[2]+1;
1036 
1037       for (i=0; i<local_n0; i++) {
1038         for (j=0; j<dim[1]; j++) {
1039           for (k=0; k<dim[2]; k++) {
1040             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1041             tempindx1 = i*dim[1]*NM + j*NM + k;
1042 
1043             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1044             indx2[tempindx]=low+tempindx1;
1045           }
1046         }
1047       }
1048 
1049       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1050       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1051 
1052       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1053       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1056       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1057       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1058       ierr = PetscFree(indx1);CHKERRQ(ierr);
1059       ierr = PetscFree(indx2);CHKERRQ(ierr);
1060 #endif
1061       break;
1062     default:
1063 #if defined(PETSC_USE_COMPLEX)
1064       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1065 
1066       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1067       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1068       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1069       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1071       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1072       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1073       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1074 #else
1075       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1076 
1077       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1078 
1079       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1080 
1081       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1082 
1083       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1084 
1085       partial_dim = fftw->partial_dim;
1086 
1087       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1088       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1089 
1090       if (dim[ndim-1]%2==0) NM = 2;
1091       else                  NM = 1;
1092 
1093       j = low;
1094       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1095         indx1[i] = local_0_start*partial_dim + i;
1096         indx2[i] = j;
1097         if (k%dim[ndim-1]==0) j+=NM;
1098         j++;
1099       }
1100       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1101       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1102 
1103       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1104       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1105       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1106       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1107       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1108       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1109       ierr = PetscFree(indx1);CHKERRQ(ierr);
1110       ierr = PetscFree(indx2);CHKERRQ(ierr);
1111 #endif
1112       break;
1113     }
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatCreate_FFTW"
1120 /*
1121     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1122 
1123   Options Database Keys:
1124 + -mat_fftw_plannerflags - set FFTW planner flags
1125 
1126    Level: intermediate
1127 
1128 */
1129 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1130 {
1131   PetscErrorCode ierr;
1132   MPI_Comm       comm;
1133   Mat_FFT        *fft=(Mat_FFT*)A->data;
1134   Mat_FFTW       *fftw;
1135   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1136   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1137   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1138   PetscBool      flg;
1139   PetscInt       p_flag,partial_dim=1,ctr;
1140   PetscMPIInt    size,rank;
1141   ptrdiff_t      *pdim;
1142   ptrdiff_t      local_n1,local_1_start;
1143 #if !defined(PETSC_USE_COMPLEX)
1144   ptrdiff_t temp;
1145   PetscInt  N1,tot_dim;
1146 #else
1147   PetscInt n1;
1148 #endif
1149 
1150   PetscFunctionBegin;
1151   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1152   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1153   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1154 
1155   fftw_mpi_init();
1156   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1157   pdim[0] = dim[0];
1158 #if !defined(PETSC_USE_COMPLEX)
1159   tot_dim = 2*dim[0];
1160 #endif
1161   for (ctr=1; ctr<ndim; ctr++) {
1162     partial_dim *= dim[ctr];
1163     pdim[ctr]    = dim[ctr];
1164 #if !defined(PETSC_USE_COMPLEX)
1165     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1166     else             tot_dim *= dim[ctr];
1167 #endif
1168   }
1169 
1170   if (size == 1) {
1171 #if defined(PETSC_USE_COMPLEX)
1172     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1173     n    = N;
1174 #else
1175     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1176     n    = tot_dim;
1177 #endif
1178 
1179   } else {
1180     ptrdiff_t local_n0,local_0_start;
1181     switch (ndim) {
1182     case 1:
1183 #if !defined(PETSC_USE_COMPLEX)
1184       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1185 #else
1186       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1187 
1188       n    = (PetscInt)local_n0;
1189       n1   = (PetscInt)local_n1;
1190       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1191 #endif
1192       break;
1193     case 2:
1194 #if defined(PETSC_USE_COMPLEX)
1195       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1196       /*
1197        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1198        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1199        */
1200       n    = (PetscInt)local_n0*dim[1];
1201       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1202 #else
1203       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1204 
1205       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1206       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1207 #endif
1208       break;
1209     case 3:
1210 #if defined(PETSC_USE_COMPLEX)
1211       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1212 
1213       n    = (PetscInt)local_n0*dim[1]*dim[2];
1214       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1215 #else
1216       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1217 
1218       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1219       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1220 #endif
1221       break;
1222     default:
1223 #if defined(PETSC_USE_COMPLEX)
1224       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1225 
1226       n    = (PetscInt)local_n0*partial_dim;
1227       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1228 #else
1229       temp = pdim[ndim-1];
1230 
1231       pdim[ndim-1] = temp/2 + 1;
1232 
1233       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1234 
1235       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1236       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1237 
1238       pdim[ndim-1] = temp;
1239 
1240       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1241 #endif
1242       break;
1243     }
1244   }
1245   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1246   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1247   fft->data = (void*)fftw;
1248 
1249   fft->n            = n;
1250   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1251   fftw->partial_dim = partial_dim;
1252 
1253   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1254   if (size == 1) {
1255 #if defined(PETSC_USE_64BIT_INDICES)
1256     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1257 #else
1258     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1259 #endif
1260   }
1261 
1262   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1263 
1264   fftw->p_forward  = 0;
1265   fftw->p_backward = 0;
1266   fftw->p_flag     = FFTW_ESTIMATE;
1267 
1268   if (size == 1) {
1269     A->ops->mult          = MatMult_SeqFFTW;
1270     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1271   } else {
1272     A->ops->mult          = MatMult_MPIFFTW;
1273     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1274   }
1275   fft->matdestroy = MatDestroy_FFTW;
1276   A->assembled    = PETSC_TRUE;
1277   A->preallocated = PETSC_TRUE;
1278 
1279   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1282 
1283   /* get runtime options */
1284   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1285   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1286   if (flg) {
1287     fftw->p_flag = iplans[p_flag];
1288   }
1289   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 
1294 
1295 
1296