xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision daa037dfd3c3bec8dc8659548d2b20b07c1dc6de)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #if !PetscDefined(HAVE_MPIUNI)
10 #include <fftw3-mpi.h>
11 #else
12 #include <fftw3.h>
13 #endif
14 EXTERN_C_END
15 
16 typedef struct {
17   ptrdiff_t    ndim_fftw,*dim_fftw;
18 #if defined(PETSC_USE_64BIT_INDICES)
19   fftw_iodim64 *iodims;
20 #else
21   fftw_iodim   *iodims;
22 #endif
23   PetscInt     partial_dim;
24   fftw_plan    p_forward,p_backward;
25   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
27                                                             executed for the arrays with which the plan was created */
28 } Mat_FFTW;
29 
30 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
32 extern PetscErrorCode MatDestroy_FFTW(Mat);
33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*);
34 #if !PetscDefined(HAVE_MPIUNI)
35 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
38 #endif
39 
40 /*
41    MatMult_SeqFFTW performs forward DFT
42    Input parameter:
43      A - the matrix
44      x - the vector on which FDFT will be performed
45 
46    Output parameter:
47      y - vector that stores result of FDFT
48 */
49 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
50 {
51   Mat_FFT        *fft  = (Mat_FFT*)A->data;
52   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
53   const PetscScalar *x_array;
54   PetscScalar    *y_array;
55 #if defined(PETSC_USE_COMPLEX)
56 #if defined(PETSC_USE_64BIT_INDICES)
57   fftw_iodim64   *iodims;
58 #else
59   fftw_iodim     *iodims;
60 #endif
61   PetscInt       i;
62 #endif
63   PetscInt       ndim = fft->ndim,*dim = fft->dim;
64 
65   PetscFunctionBegin;
66   PetscCall(VecGetArrayRead(x,&x_array));
67   PetscCall(VecGetArray(y,&y_array));
68   if (!fftw->p_forward) { /* create a plan, then excute it */
69     switch (ndim) {
70     case 1:
71 #if defined(PETSC_USE_COMPLEX)
72       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
73 #else
74       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
75 #endif
76       break;
77     case 2:
78 #if defined(PETSC_USE_COMPLEX)
79       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
80 #else
81       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
82 #endif
83       break;
84     case 3:
85 #if defined(PETSC_USE_COMPLEX)
86       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
87 #else
88       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
89 #endif
90       break;
91     default:
92 #if defined(PETSC_USE_COMPLEX)
93       iodims = fftw->iodims;
94 #if defined(PETSC_USE_64BIT_INDICES)
95       if (ndim) {
96         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
97         iodims[ndim-1].is = iodims[ndim-1].os = 1;
98         for (i=ndim-2; i>=0; --i) {
99           iodims[i].n = (ptrdiff_t)dim[i];
100           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
101         }
102       }
103       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
104 #else
105       if (ndim) {
106         iodims[ndim-1].n = (int)dim[ndim-1];
107         iodims[ndim-1].is = iodims[ndim-1].os = 1;
108         for (i=ndim-2; i>=0; --i) {
109           iodims[i].n = (int)dim[i];
110           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
111         }
112       }
113       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
114 #endif
115 
116 #else
117       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
118 #endif
119       break;
120     }
121     fftw->finarray  = (PetscScalar *) x_array;
122     fftw->foutarray = y_array;
123     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
124                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
125     fftw_execute(fftw->p_forward);
126   } else { /* use existing plan */
127     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
128 #if defined(PETSC_USE_COMPLEX)
129       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
130 #else
131       fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
132 #endif
133     } else {
134       fftw_execute(fftw->p_forward);
135     }
136   }
137   PetscCall(VecRestoreArray(y,&y_array));
138   PetscCall(VecRestoreArrayRead(x,&x_array));
139   PetscFunctionReturn(0);
140 }
141 
142 /* MatMultTranspose_SeqFFTW performs serial backward DFT
143    Input parameter:
144      A - the matrix
145      x - the vector on which BDFT will be performed
146 
147    Output parameter:
148      y - vector that stores result of BDFT
149 */
150 
151 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
152 {
153   Mat_FFT        *fft  = (Mat_FFT*)A->data;
154   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
155   const PetscScalar *x_array;
156   PetscScalar    *y_array;
157   PetscInt       ndim = fft->ndim,*dim = fft->dim;
158 #if defined(PETSC_USE_COMPLEX)
159 #if defined(PETSC_USE_64BIT_INDICES)
160   fftw_iodim64   *iodims = fftw->iodims;
161 #else
162   fftw_iodim     *iodims = fftw->iodims;
163 #endif
164 #endif
165 
166   PetscFunctionBegin;
167   PetscCall(VecGetArrayRead(x,&x_array));
168   PetscCall(VecGetArray(y,&y_array));
169   if (!fftw->p_backward) { /* create a plan, then excute it */
170     switch (ndim) {
171     case 1:
172 #if defined(PETSC_USE_COMPLEX)
173       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
174 #else
175       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
176 #endif
177       break;
178     case 2:
179 #if defined(PETSC_USE_COMPLEX)
180       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
181 #else
182       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
183 #endif
184       break;
185     case 3:
186 #if defined(PETSC_USE_COMPLEX)
187       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
188 #else
189       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
190 #endif
191       break;
192     default:
193 #if defined(PETSC_USE_COMPLEX)
194 #if defined(PETSC_USE_64BIT_INDICES)
195       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
196 #else
197       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
198 #endif
199 #else
200       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
201 #endif
202       break;
203     }
204     fftw->binarray  = (PetscScalar *) x_array;
205     fftw->boutarray = y_array;
206     fftw_execute(fftw->p_backward);
207   } else { /* use existing plan */
208     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
209 #if defined(PETSC_USE_COMPLEX)
210       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
211 #else
212       fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
213 #endif
214     } else {
215       fftw_execute(fftw->p_backward);
216     }
217   }
218   PetscCall(VecRestoreArray(y,&y_array));
219   PetscCall(VecRestoreArrayRead(x,&x_array));
220   PetscFunctionReturn(0);
221 }
222 
223 #if !PetscDefined(HAVE_MPIUNI)
224 /* MatMult_MPIFFTW performs forward DFT in parallel
225    Input parameter:
226      A - the matrix
227      x - the vector on which FDFT will be performed
228 
229    Output parameter:
230    y   - vector that stores result of FDFT
231 */
232 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
233 {
234   Mat_FFT        *fft  = (Mat_FFT*)A->data;
235   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
236   const PetscScalar *x_array;
237   PetscScalar    *y_array;
238   PetscInt       ndim = fft->ndim,*dim = fft->dim;
239   MPI_Comm       comm;
240 
241   PetscFunctionBegin;
242   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
243   PetscCall(VecGetArrayRead(x,&x_array));
244   PetscCall(VecGetArray(y,&y_array));
245   if (!fftw->p_forward) { /* create a plan, then excute it */
246     switch (ndim) {
247     case 1:
248 #if defined(PETSC_USE_COMPLEX)
249       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
250 #else
251       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
252 #endif
253       break;
254     case 2:
255 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
256       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
257 #else
258       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
259 #endif
260       break;
261     case 3:
262 #if defined(PETSC_USE_COMPLEX)
263       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
264 #else
265       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
266 #endif
267       break;
268     default:
269 #if defined(PETSC_USE_COMPLEX)
270       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
271 #else
272       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
273 #endif
274       break;
275     }
276     fftw->finarray  = (PetscScalar *) x_array;
277     fftw->foutarray = y_array;
278     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
279                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
280     fftw_execute(fftw->p_forward);
281   } else { /* use existing plan */
282     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
283       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
284     } else {
285       fftw_execute(fftw->p_forward);
286     }
287   }
288   PetscCall(VecRestoreArray(y,&y_array));
289   PetscCall(VecRestoreArrayRead(x,&x_array));
290   PetscFunctionReturn(0);
291 }
292 
293 /*
294    MatMultTranspose_MPIFFTW performs parallel backward DFT
295    Input parameter:
296      A - the matrix
297      x - the vector on which BDFT will be performed
298 
299    Output parameter:
300      y - vector that stores result of BDFT
301 */
302 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
303 {
304   Mat_FFT        *fft  = (Mat_FFT*)A->data;
305   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
306   const PetscScalar *x_array;
307   PetscScalar    *y_array;
308   PetscInt       ndim = fft->ndim,*dim = fft->dim;
309   MPI_Comm       comm;
310 
311   PetscFunctionBegin;
312   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
313   PetscCall(VecGetArrayRead(x,&x_array));
314   PetscCall(VecGetArray(y,&y_array));
315   if (!fftw->p_backward) { /* create a plan, then excute it */
316     switch (ndim) {
317     case 1:
318 #if defined(PETSC_USE_COMPLEX)
319       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
320 #else
321       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
322 #endif
323       break;
324     case 2:
325 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
326       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
327 #else
328       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
329 #endif
330       break;
331     case 3:
332 #if defined(PETSC_USE_COMPLEX)
333       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
334 #else
335       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
336 #endif
337       break;
338     default:
339 #if defined(PETSC_USE_COMPLEX)
340       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
341 #else
342       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
343 #endif
344       break;
345     }
346     fftw->binarray  = (PetscScalar *) x_array;
347     fftw->boutarray = y_array;
348     fftw_execute(fftw->p_backward);
349   } else { /* use existing plan */
350     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
351       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
352     } else {
353       fftw_execute(fftw->p_backward);
354     }
355   }
356   PetscCall(VecRestoreArray(y,&y_array));
357   PetscCall(VecRestoreArrayRead(x,&x_array));
358   PetscFunctionReturn(0);
359 }
360 #endif
361 
362 PetscErrorCode MatDestroy_FFTW(Mat A)
363 {
364   Mat_FFT        *fft  = (Mat_FFT*)A->data;
365   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
366 
367   PetscFunctionBegin;
368   fftw_destroy_plan(fftw->p_forward);
369   fftw_destroy_plan(fftw->p_backward);
370   if (fftw->iodims) {
371     free(fftw->iodims);
372   }
373   PetscCall(PetscFree(fftw->dim_fftw));
374   PetscCall(PetscFree(fft->data));
375 #if !PetscDefined(HAVE_MPIUNI)
376   fftw_mpi_cleanup();
377 #endif
378   PetscFunctionReturn(0);
379 }
380 
381 #if !PetscDefined(HAVE_MPIUNI)
382 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
383 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
384 {
385   PetscScalar    *array;
386 
387   PetscFunctionBegin;
388   PetscCall(VecGetArray(v,&array));
389   fftw_free((fftw_complex*)array);
390   PetscCall(VecRestoreArray(v,&array));
391   PetscCall(VecDestroy_MPI(v));
392   PetscFunctionReturn(0);
393 }
394 #endif
395 
396 #if !PetscDefined(HAVE_MPIUNI)
397 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new)
398 {
399   Mat            A;
400 
401   PetscFunctionBegin;
402   PetscCall(PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A));
403   PetscCall(MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL));
404   PetscFunctionReturn(0);
405 }
406 
407 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new)
408 {
409   Mat            A;
410 
411   PetscFunctionBegin;
412   PetscCall(PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A));
413   PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL));
414   PetscFunctionReturn(0);
415 }
416 
417 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
418 {
419   Mat            A;
420 
421   PetscFunctionBegin;
422   PetscCall(PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A));
423   PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new));
424   PetscFunctionReturn(0);
425 }
426 #endif
427 
428 /*@
429    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
430      parallel layout determined by FFTW
431 
432    Collective on Mat
433 
434    Input Parameter:
435 .   A - the matrix
436 
437    Output Parameters:
438 +   x - (optional) input vector of forward FFTW
439 .   y - (optional) output vector of forward FFTW
440 -   z - (optional) output vector of backward FFTW
441 
442   Level: advanced
443 
444   Note: The parallel layout of output of forward FFTW is always same as the input
445         of the backward FFTW. But parallel layout of the input vector of forward
446         FFTW might not be same as the output of backward FFTW.
447         Also note that we need to provide enough space while doing parallel real transform.
448         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
449         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
450         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
451         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
452         zeros if it is odd only one zero is needed.
453         Lastly one needs some scratch space at the end of data set in each process. alloc_local
454         figures out how much space is needed, i.e. it figures out the data+scratch space for
455         each processor and returns that.
456 
457 .seealso: MatCreateFFT()
458 @*/
459 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
460 {
461   PetscFunctionBegin;
462   PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
463   PetscFunctionReturn(0);
464 }
465 
466 PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
467 {
468   PetscMPIInt    size,rank;
469   MPI_Comm       comm;
470   Mat_FFT        *fft  = (Mat_FFT*)A->data;
471 
472   PetscFunctionBegin;
473   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
474   PetscValidType(A,1);
475   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
476 
477   PetscCallMPI(MPI_Comm_size(comm, &size));
478   PetscCallMPI(MPI_Comm_rank(comm, &rank));
479   if (size == 1) { /* sequential case */
480 #if defined(PETSC_USE_COMPLEX)
481     if (fin)  PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fin));
482     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fout));
483     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,bout));
484 #else
485     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fin));
486     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fout));
487     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,bout));
488 #endif
489 #if !PetscDefined(HAVE_MPIUNI)
490   } else { /* parallel cases */
491     Mat_FFTW     *fftw = (Mat_FFTW*)fft->data;
492     PetscInt     ndim  = fft->ndim,*dim = fft->dim;
493     ptrdiff_t    alloc_local,local_n0,local_0_start;
494     ptrdiff_t    local_n1;
495     fftw_complex *data_fout;
496     ptrdiff_t    local_1_start;
497 #if defined(PETSC_USE_COMPLEX)
498     fftw_complex *data_fin,*data_bout;
499 #else
500     double       *data_finr,*data_boutr;
501     PetscInt     n1,N1;
502     ptrdiff_t    temp;
503 #endif
504 
505     switch (ndim) {
506     case 1:
507 #if !defined(PETSC_USE_COMPLEX)
508       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
509 #else
510       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
511       if (fin) {
512         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
513         PetscCall(VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin));
514         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
515         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
516         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
517       }
518       if (fout) {
519         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
520         PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout));
521         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
522         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
523         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
524       }
525       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
526       if (bout) {
527         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528         PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout));
529         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
530         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
531         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
532       }
533       break;
534 #endif
535     case 2:
536 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
537       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
538       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
539       if (fin) {
540         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
541         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin));
542         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
543         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
544         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
545       }
546       if (fout) {
547         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
548         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout));
549         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
550         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
551         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
552       }
553       if (bout) {
554         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
555         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout));
556         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
557         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
558         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
559       }
560 #else
561       /* Get local size */
562       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
563       if (fin) {
564         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
565         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin));
566         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
567         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
568         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
569       }
570       if (fout) {
571         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
572         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout));
573         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
574         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
575         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
576       }
577       if (bout) {
578         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
579         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout));
580         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
581         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
582         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
583       }
584 #endif
585       break;
586     case 3:
587 #if !defined(PETSC_USE_COMPLEX)
588       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
589       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
590       if (fin) {
591         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
592         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin));
593         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
594         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
595         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
596       }
597       if (fout) {
598         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
599         PetscCall(VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout));
600         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
601         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
602         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
603       }
604       if (bout) {
605         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
606         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout));
607         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
608         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
609         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
610       }
611 #else
612       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
613       if (fin) {
614         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
615         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin));
616         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
617         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
618         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
619       }
620       if (fout) {
621         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
622         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout));
623         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
624         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
625         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
626       }
627       if (bout) {
628         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
629         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout));
630         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
631         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
632         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
633       }
634 #endif
635       break;
636     default:
637 #if !defined(PETSC_USE_COMPLEX)
638       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
639       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
640       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
641       N1          = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
642       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
643 
644       if (fin) {
645         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
646         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin));
647         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
648         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
649         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
650       }
651       if (fout) {
652         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
653         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout));
654         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
655         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
656         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
657       }
658       if (bout) {
659         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
660         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout));
661         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
662         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
663         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
664       }
665 #else
666       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
667       if (fin) {
668         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
669         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin));
670         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
671         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
672         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
673       }
674       if (fout) {
675         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
676         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout));
677         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
678         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
679         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
680       }
681       if (bout) {
682         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
683         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout));
684         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
685         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
686         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
687       }
688 #endif
689       break;
690     }
691     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
692        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
693        memory leaks. We void these pointers here to avoid acident uses.
694     */
695     if (fin)  (*fin)->ops->replacearray = NULL;
696     if (fout) (*fout)->ops->replacearray = NULL;
697     if (bout) (*bout)->ops->replacearray = NULL;
698 #endif
699   }
700   PetscFunctionReturn(0);
701 }
702 
703 /*@
704    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
705 
706    Collective on Mat
707 
708    Input Parameters:
709 +  A - FFTW matrix
710 -  x - the PETSc vector
711 
712    Output Parameters:
713 .  y - the FFTW vector
714 
715   Options Database Keys:
716 . -mat_fftw_plannerflags - set FFTW planner flags
717 
718    Level: intermediate
719 
720    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
721          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
722          zeros. This routine does that job by scattering operation.
723 
724 .seealso: VecScatterFFTWToPetsc()
725 @*/
726 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
727 {
728   PetscFunctionBegin;
729   PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
730   PetscFunctionReturn(0);
731 }
732 
733 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
734 {
735   MPI_Comm       comm;
736   Mat_FFT        *fft  = (Mat_FFT*)A->data;
737   PetscInt       low;
738   PetscMPIInt    rank,size;
739   PetscInt       vsize,vsize1;
740   VecScatter     vecscat;
741   IS             list1;
742 
743   PetscFunctionBegin;
744   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
745   PetscCallMPI(MPI_Comm_size(comm, &size));
746   PetscCallMPI(MPI_Comm_rank(comm, &rank));
747   PetscCall(VecGetOwnershipRange(y,&low,NULL));
748 
749   if (size==1) {
750     PetscCall(VecGetSize(x,&vsize));
751     PetscCall(VecGetSize(y,&vsize1));
752     PetscCall(ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1));
753     PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat));
754     PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
755     PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
756     PetscCall(VecScatterDestroy(&vecscat));
757     PetscCall(ISDestroy(&list1));
758 #if !PetscDefined(HAVE_MPIUNI)
759   } else {
760     Mat_FFTW   *fftw = (Mat_FFTW*)fft->data;
761     PetscInt   ndim  = fft->ndim,*dim = fft->dim;
762     ptrdiff_t  local_n0,local_0_start;
763     ptrdiff_t  local_n1,local_1_start;
764     IS         list2;
765 #if !defined(PETSC_USE_COMPLEX)
766     PetscInt   i,j,k,partial_dim;
767     PetscInt   *indx1, *indx2, tempindx, tempindx1;
768     PetscInt   NM;
769     ptrdiff_t  temp;
770 #endif
771 
772     switch (ndim) {
773     case 1:
774 #if defined(PETSC_USE_COMPLEX)
775       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
776 
777       PetscCall(ISCreateStride(comm,local_n0,local_0_start,1,&list1));
778       PetscCall(ISCreateStride(comm,local_n0,low,1,&list2));
779       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
780       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
781       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
782       PetscCall(VecScatterDestroy(&vecscat));
783       PetscCall(ISDestroy(&list1));
784       PetscCall(ISDestroy(&list2));
785 #else
786       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
787 #endif
788       break;
789     case 2:
790 #if defined(PETSC_USE_COMPLEX)
791       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
792 
793       PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1));
794       PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2));
795       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
796       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
797       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
798       PetscCall(VecScatterDestroy(&vecscat));
799       PetscCall(ISDestroy(&list1));
800       PetscCall(ISDestroy(&list2));
801 #else
802       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
803 
804       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1));
805       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2));
806 
807       if (dim[1]%2==0) {
808         NM = dim[1]+2;
809       } else {
810         NM = dim[1]+1;
811       }
812       for (i=0; i<local_n0; i++) {
813         for (j=0; j<dim[1]; j++) {
814           tempindx  = i*dim[1] + j;
815           tempindx1 = i*NM + j;
816 
817           indx1[tempindx]=local_0_start*dim[1]+tempindx;
818           indx2[tempindx]=low+tempindx1;
819         }
820       }
821 
822       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1));
823       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2));
824 
825       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
826       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
827       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
828       PetscCall(VecScatterDestroy(&vecscat));
829       PetscCall(ISDestroy(&list1));
830       PetscCall(ISDestroy(&list2));
831       PetscCall(PetscFree(indx1));
832       PetscCall(PetscFree(indx2));
833 #endif
834       break;
835 
836     case 3:
837 #if defined(PETSC_USE_COMPLEX)
838       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
839 
840       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1));
841       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2));
842       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
843       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
844       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
845       PetscCall(VecScatterDestroy(&vecscat));
846       PetscCall(ISDestroy(&list1));
847       PetscCall(ISDestroy(&list2));
848 #else
849       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
850       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
851       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
852 
853       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1));
854       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2));
855 
856       if (dim[2]%2==0) NM = dim[2]+2;
857       else             NM = dim[2]+1;
858 
859       for (i=0; i<local_n0; i++) {
860         for (j=0; j<dim[1]; j++) {
861           for (k=0; k<dim[2]; k++) {
862             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
863             tempindx1 = i*dim[1]*NM + j*NM + k;
864 
865             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
866             indx2[tempindx]=low+tempindx1;
867           }
868         }
869       }
870 
871       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1));
872       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2));
873       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
874       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
875       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
876       PetscCall(VecScatterDestroy(&vecscat));
877       PetscCall(ISDestroy(&list1));
878       PetscCall(ISDestroy(&list2));
879       PetscCall(PetscFree(indx1));
880       PetscCall(PetscFree(indx2));
881 #endif
882       break;
883 
884     default:
885 #if defined(PETSC_USE_COMPLEX)
886       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
887 
888       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1));
889       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2));
890       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
891       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
892       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
893       PetscCall(VecScatterDestroy(&vecscat));
894       PetscCall(ISDestroy(&list1));
895       PetscCall(ISDestroy(&list2));
896 #else
897       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
898       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
899       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
900 
901       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
902 
903       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
904 
905       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
906 
907       partial_dim = fftw->partial_dim;
908 
909       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1));
910       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2));
911 
912       if (dim[ndim-1]%2==0) NM = 2;
913       else                  NM = 1;
914 
915       j = low;
916       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
917         indx1[i] = local_0_start*partial_dim + i;
918         indx2[i] = j;
919         if (k%dim[ndim-1]==0) j+=NM;
920         j++;
921       }
922       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1));
923       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2));
924       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
925       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
926       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
927       PetscCall(VecScatterDestroy(&vecscat));
928       PetscCall(ISDestroy(&list1));
929       PetscCall(ISDestroy(&list2));
930       PetscCall(PetscFree(indx1));
931       PetscCall(PetscFree(indx2));
932 #endif
933       break;
934     }
935 #endif
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 /*@
941    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
942 
943    Collective on Mat
944 
945     Input Parameters:
946 +   A - FFTW matrix
947 -   x - FFTW vector
948 
949    Output Parameters:
950 .  y - PETSc vector
951 
952    Level: intermediate
953 
954    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
955          VecScatterFFTWToPetsc removes those extra zeros.
956 
957 .seealso: VecScatterPetscToFFTW()
958 @*/
959 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
960 {
961   PetscFunctionBegin;
962   PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
967 {
968   MPI_Comm       comm;
969   Mat_FFT        *fft  = (Mat_FFT*)A->data;
970   PetscInt       low;
971   PetscMPIInt    rank,size;
972   VecScatter     vecscat;
973   IS             list1;
974 
975   PetscFunctionBegin;
976   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
977   PetscCallMPI(MPI_Comm_size(comm, &size));
978   PetscCallMPI(MPI_Comm_rank(comm, &rank));
979   PetscCall(VecGetOwnershipRange(x,&low,NULL));
980 
981   if (size==1) {
982     PetscCall(ISCreateStride(comm,fft->N,0,1,&list1));
983     PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat));
984     PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
985     PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
986     PetscCall(VecScatterDestroy(&vecscat));
987     PetscCall(ISDestroy(&list1));
988 
989 #if !PetscDefined(HAVE_MPIUNI)
990   } else {
991     Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
992     PetscInt       ndim  = fft->ndim,*dim = fft->dim;
993     ptrdiff_t      local_n0,local_0_start;
994     ptrdiff_t      local_n1,local_1_start;
995     IS             list2;
996 #if !defined(PETSC_USE_COMPLEX)
997     PetscInt       i,j,k,partial_dim;
998     PetscInt       *indx1, *indx2, tempindx, tempindx1;
999     PetscInt       NM;
1000     ptrdiff_t      temp;
1001 #endif
1002     switch (ndim) {
1003     case 1:
1004 #if defined(PETSC_USE_COMPLEX)
1005       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1006 
1007       PetscCall(ISCreateStride(comm,local_n1,local_1_start,1,&list1));
1008       PetscCall(ISCreateStride(comm,local_n1,low,1,&list2));
1009       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
1010       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1011       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1012       PetscCall(VecScatterDestroy(&vecscat));
1013       PetscCall(ISDestroy(&list1));
1014       PetscCall(ISDestroy(&list2));
1015 #else
1016       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1017 #endif
1018       break;
1019     case 2:
1020 #if defined(PETSC_USE_COMPLEX)
1021       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1022 
1023       PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1));
1024       PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2));
1025       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
1026       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1027       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1028       PetscCall(VecScatterDestroy(&vecscat));
1029       PetscCall(ISDestroy(&list1));
1030       PetscCall(ISDestroy(&list2));
1031 #else
1032       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1033 
1034       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1));
1035       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2));
1036 
1037       if (dim[1]%2==0) NM = dim[1]+2;
1038       else             NM = dim[1]+1;
1039 
1040       for (i=0; i<local_n0; i++) {
1041         for (j=0; j<dim[1]; j++) {
1042           tempindx = i*dim[1] + j;
1043           tempindx1 = i*NM + j;
1044 
1045           indx1[tempindx]=local_0_start*dim[1]+tempindx;
1046           indx2[tempindx]=low+tempindx1;
1047         }
1048       }
1049 
1050       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1));
1051       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2));
1052 
1053       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
1054       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1055       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1056       PetscCall(VecScatterDestroy(&vecscat));
1057       PetscCall(ISDestroy(&list1));
1058       PetscCall(ISDestroy(&list2));
1059       PetscCall(PetscFree(indx1));
1060       PetscCall(PetscFree(indx2));
1061 #endif
1062       break;
1063     case 3:
1064 #if defined(PETSC_USE_COMPLEX)
1065       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1066 
1067       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1));
1068       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2));
1069       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
1070       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1071       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1072       PetscCall(VecScatterDestroy(&vecscat));
1073       PetscCall(ISDestroy(&list1));
1074       PetscCall(ISDestroy(&list2));
1075 #else
1076       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1077 
1078       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1));
1079       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2));
1080 
1081       if (dim[2]%2==0) NM = dim[2]+2;
1082       else             NM = dim[2]+1;
1083 
1084       for (i=0; i<local_n0; i++) {
1085         for (j=0; j<dim[1]; j++) {
1086           for (k=0; k<dim[2]; k++) {
1087             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1088             tempindx1 = i*dim[1]*NM + j*NM + k;
1089 
1090             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1091             indx2[tempindx]=low+tempindx1;
1092           }
1093         }
1094       }
1095 
1096       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1));
1097       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2));
1098 
1099       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
1100       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1101       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1102       PetscCall(VecScatterDestroy(&vecscat));
1103       PetscCall(ISDestroy(&list1));
1104       PetscCall(ISDestroy(&list2));
1105       PetscCall(PetscFree(indx1));
1106       PetscCall(PetscFree(indx2));
1107 #endif
1108       break;
1109     default:
1110 #if defined(PETSC_USE_COMPLEX)
1111       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1112 
1113       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1));
1114       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2));
1115       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
1116       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1117       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1118       PetscCall(VecScatterDestroy(&vecscat));
1119       PetscCall(ISDestroy(&list1));
1120       PetscCall(ISDestroy(&list2));
1121 #else
1122       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1123 
1124       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1125 
1126       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1127 
1128       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1129 
1130       partial_dim = fftw->partial_dim;
1131 
1132       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1));
1133       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2));
1134 
1135       if (dim[ndim-1]%2==0) NM = 2;
1136       else                  NM = 1;
1137 
1138       j = low;
1139       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1140         indx1[i] = local_0_start*partial_dim + i;
1141         indx2[i] = j;
1142         if (k%dim[ndim-1]==0) j+=NM;
1143         j++;
1144       }
1145       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1));
1146       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2));
1147 
1148       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
1149       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1150       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1151       PetscCall(VecScatterDestroy(&vecscat));
1152       PetscCall(ISDestroy(&list1));
1153       PetscCall(ISDestroy(&list2));
1154       PetscCall(PetscFree(indx1));
1155       PetscCall(PetscFree(indx2));
1156 #endif
1157       break;
1158     }
1159 #endif
1160   }
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 /*
1165     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1166 
1167   Options Database Keys:
1168 + -mat_fftw_plannerflags - set FFTW planner flags
1169 
1170    Level: intermediate
1171 
1172 */
1173 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1174 {
1175   MPI_Comm       comm;
1176   Mat_FFT        *fft = (Mat_FFT*)A->data;
1177   Mat_FFTW       *fftw;
1178   PetscInt       ndim = fft->ndim,*dim = fft->dim;
1179   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1180   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1181   PetscBool      flg;
1182   PetscInt       p_flag,partial_dim=1,ctr;
1183   PetscMPIInt    size,rank;
1184   ptrdiff_t      *pdim;
1185   PetscErrorCode ierr;
1186 #if !defined(PETSC_USE_COMPLEX)
1187   PetscInt       tot_dim;
1188 #endif
1189 
1190   PetscFunctionBegin;
1191   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1192   PetscCallMPI(MPI_Comm_size(comm, &size));
1193   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1194 
1195 #if !PetscDefined(HAVE_MPIUNI)
1196   fftw_mpi_init();
1197 #endif
1198   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1199   pdim[0] = dim[0];
1200 #if !defined(PETSC_USE_COMPLEX)
1201   tot_dim = 2*dim[0];
1202 #endif
1203   for (ctr=1; ctr<ndim; ctr++) {
1204     partial_dim *= dim[ctr];
1205     pdim[ctr]    = dim[ctr];
1206 #if !defined(PETSC_USE_COMPLEX)
1207     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1208     else             tot_dim *= dim[ctr];
1209 #endif
1210   }
1211 
1212   if (size == 1) {
1213 #if defined(PETSC_USE_COMPLEX)
1214     PetscCall(MatSetSizes(A,fft->N,fft->N,fft->N,fft->N));
1215     fft->n = fft->N;
1216 #else
1217     PetscCall(MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim));
1218     fft->n = tot_dim;
1219 #endif
1220 #if !PetscDefined(HAVE_MPIUNI)
1221   } else {
1222     ptrdiff_t local_n0,local_0_start,local_n1,local_1_start;
1223 #if !defined(PETSC_USE_COMPLEX)
1224     ptrdiff_t temp;
1225     PetscInt  N1;
1226 #endif
1227 
1228     switch (ndim) {
1229     case 1:
1230 #if !defined(PETSC_USE_COMPLEX)
1231       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1232 #else
1233       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1234       fft->n = (PetscInt)local_n0;
1235       PetscCall(MatSetSizes(A,local_n1,fft->n,fft->N,fft->N));
1236 #endif
1237       break;
1238     case 2:
1239 #if defined(PETSC_USE_COMPLEX)
1240       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1241       fft->n    = (PetscInt)local_n0*dim[1];
1242       PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N));
1243 #else
1244       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1245 
1246       fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1247       PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)));
1248 #endif
1249       break;
1250     case 3:
1251 #if defined(PETSC_USE_COMPLEX)
1252       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1253 
1254       fft->n = (PetscInt)local_n0*dim[1]*dim[2];
1255       PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N));
1256 #else
1257       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1258 
1259       fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1260       PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)));
1261 #endif
1262       break;
1263     default:
1264 #if defined(PETSC_USE_COMPLEX)
1265       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1266 
1267       fft->n = (PetscInt)local_n0*partial_dim;
1268       PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N));
1269 #else
1270       temp = pdim[ndim-1];
1271 
1272       pdim[ndim-1] = temp/2 + 1;
1273 
1274       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1275 
1276       fft->n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1277       N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1278 
1279       pdim[ndim-1] = temp;
1280 
1281       PetscCall(MatSetSizes(A,fft->n,fft->n,N1,N1));
1282 #endif
1283       break;
1284     }
1285 #endif
1286   }
1287   free(pdim);
1288   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATFFTW));
1289   PetscCall(PetscNewLog(A,&fftw));
1290   fft->data = (void*)fftw;
1291 
1292   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1293   fftw->partial_dim = partial_dim;
1294 
1295   PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
1296   if (size == 1) {
1297 #if defined(PETSC_USE_64BIT_INDICES)
1298     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1299 #else
1300     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1301 #endif
1302   }
1303 
1304   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1305 
1306   fftw->p_forward  = NULL;
1307   fftw->p_backward = NULL;
1308   fftw->p_flag     = FFTW_ESTIMATE;
1309 
1310   if (size == 1) {
1311     A->ops->mult          = MatMult_SeqFFTW;
1312     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1313 #if !PetscDefined(HAVE_MPIUNI)
1314   } else {
1315     A->ops->mult          = MatMult_MPIFFTW;
1316     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1317 #endif
1318   }
1319   fft->matdestroy = MatDestroy_FFTW;
1320   A->assembled    = PETSC_TRUE;
1321   A->preallocated = PETSC_TRUE;
1322 
1323   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW));
1324   PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW));
1325   PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW));
1326 
1327   /* get runtime options */
1328   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");PetscCall(ierr);
1329   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg));
1330   if (flg) fftw->p_flag = iplans[p_flag];
1331   ierr = PetscOptionsEnd();PetscCall(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334