xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 76be6f4ff3bd4e251c19fc00ebbebfd58b6e7589)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #if !PetscDefined(HAVE_MPIUNI)
10 #include <fftw3-mpi.h>
11 #else
12 #include <fftw3.h>
13 #endif
14 EXTERN_C_END
15 
16 typedef struct {
17   ptrdiff_t    ndim_fftw,*dim_fftw;
18 #if defined(PETSC_USE_64BIT_INDICES)
19   fftw_iodim64 *iodims;
20 #else
21   fftw_iodim   *iodims;
22 #endif
23   PetscInt     partial_dim;
24   fftw_plan    p_forward,p_backward;
25   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
27                                                             executed for the arrays with which the plan was created */
28 } Mat_FFTW;
29 
30 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
32 extern PetscErrorCode MatDestroy_FFTW(Mat);
33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*);
34 #if !PetscDefined(HAVE_MPIUNI)
35 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
38 #endif
39 
40 /*
41    MatMult_SeqFFTW performs forward DFT
42    Input parameter:
43      A - the matrix
44      x - the vector on which FDFT will be performed
45 
46    Output parameter:
47      y - vector that stores result of FDFT
48 */
49 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
50 {
51   Mat_FFT        *fft  = (Mat_FFT*)A->data;
52   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
53   const PetscScalar *x_array;
54   PetscScalar    *y_array;
55 #if defined(PETSC_USE_COMPLEX)
56 #if defined(PETSC_USE_64BIT_INDICES)
57   fftw_iodim64   *iodims;
58 #else
59   fftw_iodim     *iodims;
60 #endif
61   PetscInt       i;
62 #endif
63   PetscInt       ndim = fft->ndim,*dim = fft->dim;
64 
65   PetscFunctionBegin;
66   PetscCall(VecGetArrayRead(x,&x_array));
67   PetscCall(VecGetArray(y,&y_array));
68   if (!fftw->p_forward) { /* create a plan, then execute it */
69     switch (ndim) {
70     case 1:
71 #if defined(PETSC_USE_COMPLEX)
72       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
73 #else
74       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
75 #endif
76       break;
77     case 2:
78 #if defined(PETSC_USE_COMPLEX)
79       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
80 #else
81       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
82 #endif
83       break;
84     case 3:
85 #if defined(PETSC_USE_COMPLEX)
86       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
87 #else
88       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
89 #endif
90       break;
91     default:
92 #if defined(PETSC_USE_COMPLEX)
93       iodims = fftw->iodims;
94 #if defined(PETSC_USE_64BIT_INDICES)
95       if (ndim) {
96         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
97         iodims[ndim-1].is = iodims[ndim-1].os = 1;
98         for (i=ndim-2; i>=0; --i) {
99           iodims[i].n = (ptrdiff_t)dim[i];
100           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
101         }
102       }
103       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
104 #else
105       if (ndim) {
106         iodims[ndim-1].n = (int)dim[ndim-1];
107         iodims[ndim-1].is = iodims[ndim-1].os = 1;
108         for (i=ndim-2; i>=0; --i) {
109           iodims[i].n = (int)dim[i];
110           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
111         }
112       }
113       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
114 #endif
115 
116 #else
117       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
118 #endif
119       break;
120     }
121     fftw->finarray  = (PetscScalar *) x_array;
122     fftw->foutarray = y_array;
123     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
124                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
125     fftw_execute(fftw->p_forward);
126   } else { /* use existing plan */
127     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
128 #if defined(PETSC_USE_COMPLEX)
129       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
130 #else
131       fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
132 #endif
133     } else {
134       fftw_execute(fftw->p_forward);
135     }
136   }
137   PetscCall(VecRestoreArray(y,&y_array));
138   PetscCall(VecRestoreArrayRead(x,&x_array));
139   PetscFunctionReturn(0);
140 }
141 
142 /* MatMultTranspose_SeqFFTW performs serial backward DFT
143    Input parameter:
144      A - the matrix
145      x - the vector on which BDFT will be performed
146 
147    Output parameter:
148      y - vector that stores result of BDFT
149 */
150 
151 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
152 {
153   Mat_FFT        *fft  = (Mat_FFT*)A->data;
154   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
155   const PetscScalar *x_array;
156   PetscScalar    *y_array;
157   PetscInt       ndim = fft->ndim,*dim = fft->dim;
158 #if defined(PETSC_USE_COMPLEX)
159 #if defined(PETSC_USE_64BIT_INDICES)
160   fftw_iodim64   *iodims = fftw->iodims;
161 #else
162   fftw_iodim     *iodims = fftw->iodims;
163 #endif
164 #endif
165 
166   PetscFunctionBegin;
167   PetscCall(VecGetArrayRead(x,&x_array));
168   PetscCall(VecGetArray(y,&y_array));
169   if (!fftw->p_backward) { /* create a plan, then execute it */
170     switch (ndim) {
171     case 1:
172 #if defined(PETSC_USE_COMPLEX)
173       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
174 #else
175       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
176 #endif
177       break;
178     case 2:
179 #if defined(PETSC_USE_COMPLEX)
180       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
181 #else
182       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
183 #endif
184       break;
185     case 3:
186 #if defined(PETSC_USE_COMPLEX)
187       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
188 #else
189       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
190 #endif
191       break;
192     default:
193 #if defined(PETSC_USE_COMPLEX)
194 #if defined(PETSC_USE_64BIT_INDICES)
195       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
196 #else
197       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
198 #endif
199 #else
200       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
201 #endif
202       break;
203     }
204     fftw->binarray  = (PetscScalar *) x_array;
205     fftw->boutarray = y_array;
206     fftw_execute(fftw->p_backward);
207   } else { /* use existing plan */
208     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
209 #if defined(PETSC_USE_COMPLEX)
210       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
211 #else
212       fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
213 #endif
214     } else {
215       fftw_execute(fftw->p_backward);
216     }
217   }
218   PetscCall(VecRestoreArray(y,&y_array));
219   PetscCall(VecRestoreArrayRead(x,&x_array));
220   PetscFunctionReturn(0);
221 }
222 
223 #if !PetscDefined(HAVE_MPIUNI)
224 /* MatMult_MPIFFTW performs forward DFT in parallel
225    Input parameter:
226      A - the matrix
227      x - the vector on which FDFT will be performed
228 
229    Output parameter:
230    y   - vector that stores result of FDFT
231 */
232 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
233 {
234   Mat_FFT        *fft  = (Mat_FFT*)A->data;
235   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
236   const PetscScalar *x_array;
237   PetscScalar    *y_array;
238   PetscInt       ndim = fft->ndim,*dim = fft->dim;
239   MPI_Comm       comm;
240 
241   PetscFunctionBegin;
242   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
243   PetscCall(VecGetArrayRead(x,&x_array));
244   PetscCall(VecGetArray(y,&y_array));
245   if (!fftw->p_forward) { /* create a plan, then execute it */
246     switch (ndim) {
247     case 1:
248 #if defined(PETSC_USE_COMPLEX)
249       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
250 #else
251       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
252 #endif
253       break;
254     case 2:
255 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
256       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
257 #else
258       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
259 #endif
260       break;
261     case 3:
262 #if defined(PETSC_USE_COMPLEX)
263       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
264 #else
265       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
266 #endif
267       break;
268     default:
269 #if defined(PETSC_USE_COMPLEX)
270       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
271 #else
272       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
273 #endif
274       break;
275     }
276     fftw->finarray  = (PetscScalar *) x_array;
277     fftw->foutarray = y_array;
278     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
279                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
280     fftw_execute(fftw->p_forward);
281   } else { /* use existing plan */
282     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
283       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
284     } else {
285       fftw_execute(fftw->p_forward);
286     }
287   }
288   PetscCall(VecRestoreArray(y,&y_array));
289   PetscCall(VecRestoreArrayRead(x,&x_array));
290   PetscFunctionReturn(0);
291 }
292 
293 /*
294    MatMultTranspose_MPIFFTW performs parallel backward DFT
295    Input parameter:
296      A - the matrix
297      x - the vector on which BDFT will be performed
298 
299    Output parameter:
300      y - vector that stores result of BDFT
301 */
302 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
303 {
304   Mat_FFT        *fft  = (Mat_FFT*)A->data;
305   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
306   const PetscScalar *x_array;
307   PetscScalar    *y_array;
308   PetscInt       ndim = fft->ndim,*dim = fft->dim;
309   MPI_Comm       comm;
310 
311   PetscFunctionBegin;
312   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
313   PetscCall(VecGetArrayRead(x,&x_array));
314   PetscCall(VecGetArray(y,&y_array));
315   if (!fftw->p_backward) { /* create a plan, then execute it */
316     switch (ndim) {
317     case 1:
318 #if defined(PETSC_USE_COMPLEX)
319       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
320 #else
321       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
322 #endif
323       break;
324     case 2:
325 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
326       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
327 #else
328       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
329 #endif
330       break;
331     case 3:
332 #if defined(PETSC_USE_COMPLEX)
333       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
334 #else
335       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
336 #endif
337       break;
338     default:
339 #if defined(PETSC_USE_COMPLEX)
340       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
341 #else
342       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
343 #endif
344       break;
345     }
346     fftw->binarray  = (PetscScalar *) x_array;
347     fftw->boutarray = y_array;
348     fftw_execute(fftw->p_backward);
349   } else { /* use existing plan */
350     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
351       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
352     } else {
353       fftw_execute(fftw->p_backward);
354     }
355   }
356   PetscCall(VecRestoreArray(y,&y_array));
357   PetscCall(VecRestoreArrayRead(x,&x_array));
358   PetscFunctionReturn(0);
359 }
360 #endif
361 
362 PetscErrorCode MatDestroy_FFTW(Mat A)
363 {
364   Mat_FFT        *fft  = (Mat_FFT*)A->data;
365   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
366 
367   PetscFunctionBegin;
368   fftw_destroy_plan(fftw->p_forward);
369   fftw_destroy_plan(fftw->p_backward);
370   if (fftw->iodims) {
371     free(fftw->iodims);
372   }
373   PetscCall(PetscFree(fftw->dim_fftw));
374   PetscCall(PetscFree(fft->data));
375 #if !PetscDefined(HAVE_MPIUNI)
376   fftw_mpi_cleanup();
377 #endif
378   PetscFunctionReturn(0);
379 }
380 
381 #if !PetscDefined(HAVE_MPIUNI)
382 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
383 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
384 {
385   PetscScalar    *array;
386 
387   PetscFunctionBegin;
388   PetscCall(VecGetArray(v,&array));
389   fftw_free((fftw_complex*)array);
390   PetscCall(VecRestoreArray(v,&array));
391   PetscCall(VecDestroy_MPI(v));
392   PetscFunctionReturn(0);
393 }
394 #endif
395 
396 #if !PetscDefined(HAVE_MPIUNI)
397 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new)
398 {
399   Mat            A;
400 
401   PetscFunctionBegin;
402   PetscCall(PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A));
403   PetscCall(MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL));
404   PetscFunctionReturn(0);
405 }
406 
407 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new)
408 {
409   Mat            A;
410 
411   PetscFunctionBegin;
412   PetscCall(PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A));
413   PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL));
414   PetscFunctionReturn(0);
415 }
416 
417 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
418 {
419   Mat            A;
420 
421   PetscFunctionBegin;
422   PetscCall(PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A));
423   PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new));
424   PetscFunctionReturn(0);
425 }
426 #endif
427 
428 /*@
429    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
430      parallel layout determined by FFTW
431 
432    Collective on Mat
433 
434    Input Parameter:
435 .   A - the matrix
436 
437    Output Parameters:
438 +   x - (optional) input vector of forward FFTW
439 .   y - (optional) output vector of forward FFTW
440 -   z - (optional) output vector of backward FFTW
441 
442   Level: advanced
443 
444   Note: The parallel layout of output of forward FFTW is always same as the input
445         of the backward FFTW. But parallel layout of the input vector of forward
446         FFTW might not be same as the output of backward FFTW.
447         Also note that we need to provide enough space while doing parallel real transform.
448         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
449         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
450         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
451         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
452         zeros if it is odd only one zero is needed.
453         Lastly one needs some scratch space at the end of data set in each process. alloc_local
454         figures out how much space is needed, i.e. it figures out the data+scratch space for
455         each processor and returns that.
456 
457 .seealso: `MatCreateFFT()`
458 @*/
459 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
460 {
461   PetscFunctionBegin;
462   PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));
463   PetscFunctionReturn(0);
464 }
465 
466 PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
467 {
468   PetscMPIInt    size,rank;
469   MPI_Comm       comm;
470   Mat_FFT        *fft  = (Mat_FFT*)A->data;
471 
472   PetscFunctionBegin;
473   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
474   PetscValidType(A,1);
475   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
476 
477   PetscCallMPI(MPI_Comm_size(comm, &size));
478   PetscCallMPI(MPI_Comm_rank(comm, &rank));
479   if (size == 1) { /* sequential case */
480 #if defined(PETSC_USE_COMPLEX)
481     if (fin)  PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fin));
482     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fout));
483     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,bout));
484 #else
485     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fin));
486     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fout));
487     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,bout));
488 #endif
489 #if !PetscDefined(HAVE_MPIUNI)
490   } else { /* parallel cases */
491     Mat_FFTW     *fftw = (Mat_FFTW*)fft->data;
492     PetscInt     ndim  = fft->ndim,*dim = fft->dim;
493     ptrdiff_t    alloc_local,local_n0,local_0_start;
494     ptrdiff_t    local_n1;
495     fftw_complex *data_fout;
496     ptrdiff_t    local_1_start;
497 #if defined(PETSC_USE_COMPLEX)
498     fftw_complex *data_fin,*data_bout;
499 #else
500     double       *data_finr,*data_boutr;
501     PetscInt     n1,N1;
502     ptrdiff_t    temp;
503 #endif
504 
505     switch (ndim) {
506     case 1:
507 #if !defined(PETSC_USE_COMPLEX)
508       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
509 #else
510       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
511       if (fin) {
512         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
513         PetscCall(VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin));
514         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
515         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
516         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
517       }
518       if (fout) {
519         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
520         PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout));
521         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
522         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
523         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
524       }
525       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
526       if (bout) {
527         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528         PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout));
529         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
530         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
531         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
532       }
533       break;
534 #endif
535     case 2:
536 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
537       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
538       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
539       if (fin) {
540         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
541         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin));
542         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
543         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
544         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
545       }
546       if (fout) {
547         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
548         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout));
549         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
550         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
551         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
552       }
553       if (bout) {
554         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
555         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout));
556         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
557         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
558         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
559       }
560 #else
561       /* Get local size */
562       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
563       if (fin) {
564         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
565         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin));
566         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
567         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
568         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
569       }
570       if (fout) {
571         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
572         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout));
573         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
574         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
575         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
576       }
577       if (bout) {
578         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
579         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout));
580         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
581         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
582         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
583       }
584 #endif
585       break;
586     case 3:
587 #if !defined(PETSC_USE_COMPLEX)
588       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
589       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
590       if (fin) {
591         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
592         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin));
593         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
594         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
595         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
596       }
597       if (fout) {
598         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
599         PetscCall(VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout));
600         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
601         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
602         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
603       }
604       if (bout) {
605         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
606         PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout));
607         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
608         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
609         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
610       }
611 #else
612       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
613       if (fin) {
614         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
615         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin));
616         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
617         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
618         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
619       }
620       if (fout) {
621         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
622         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout));
623         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
624         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
625         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
626       }
627       if (bout) {
628         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
629         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout));
630         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
631         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
632         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
633       }
634 #endif
635       break;
636     default:
637 #if !defined(PETSC_USE_COMPLEX)
638       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
639       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
640       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
641       N1          = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
642       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
643 
644       if (fin) {
645         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
646         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin));
647         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
648         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
649         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
650       }
651       if (fout) {
652         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
653         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout));
654         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
655         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
656         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
657       }
658       if (bout) {
659         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
660         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout));
661         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
662         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
663         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
664       }
665 #else
666       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
667       if (fin) {
668         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
669         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin));
670         PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A));
671         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
672         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
673       }
674       if (fout) {
675         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
676         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout));
677         PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A));
678         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
679         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
680       }
681       if (bout) {
682         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
683         PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout));
684         PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A));
685         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
686         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
687       }
688 #endif
689       break;
690     }
691     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
692        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
693        memory leaks. We void these pointers here to avoid acident uses.
694     */
695     if (fin)  (*fin)->ops->replacearray = NULL;
696     if (fout) (*fout)->ops->replacearray = NULL;
697     if (bout) (*bout)->ops->replacearray = NULL;
698 #endif
699   }
700   PetscFunctionReturn(0);
701 }
702 
703 /*@
704    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
705 
706    Collective on Mat
707 
708    Input Parameters:
709 +  A - FFTW matrix
710 -  x - the PETSc vector
711 
712    Output Parameters:
713 .  y - the FFTW vector
714 
715   Options Database Keys:
716 . -mat_fftw_plannerflags - set FFTW planner flags
717 
718    Level: intermediate
719 
720    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
721          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
722          zeros. This routine does that job by scattering operation.
723 
724 .seealso: `VecScatterFFTWToPetsc()`
725 @*/
726 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
727 {
728   PetscFunctionBegin;
729   PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));
730   PetscFunctionReturn(0);
731 }
732 
733 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
734 {
735   MPI_Comm       comm;
736   Mat_FFT        *fft  = (Mat_FFT*)A->data;
737   PetscInt       low;
738   PetscMPIInt    rank,size;
739   PetscInt       vsize,vsize1;
740   VecScatter     vecscat;
741   IS             list1;
742 
743   PetscFunctionBegin;
744   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
745   PetscCallMPI(MPI_Comm_size(comm, &size));
746   PetscCallMPI(MPI_Comm_rank(comm, &rank));
747   PetscCall(VecGetOwnershipRange(y,&low,NULL));
748 
749   if (size==1) {
750     PetscCall(VecGetSize(x,&vsize));
751     PetscCall(VecGetSize(y,&vsize1));
752     PetscCall(ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1));
753     PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat));
754     PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
755     PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
756     PetscCall(VecScatterDestroy(&vecscat));
757     PetscCall(ISDestroy(&list1));
758 #if !PetscDefined(HAVE_MPIUNI)
759   } else {
760     Mat_FFTW   *fftw = (Mat_FFTW*)fft->data;
761     PetscInt   ndim  = fft->ndim,*dim = fft->dim;
762     ptrdiff_t  local_n0,local_0_start;
763     ptrdiff_t  local_n1,local_1_start;
764     IS         list2;
765 #if !defined(PETSC_USE_COMPLEX)
766     PetscInt   i,j,k,partial_dim;
767     PetscInt   *indx1, *indx2, tempindx, tempindx1;
768     PetscInt   NM;
769     ptrdiff_t  temp;
770 #endif
771 
772     switch (ndim) {
773     case 1:
774 #if defined(PETSC_USE_COMPLEX)
775       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
776 
777       PetscCall(ISCreateStride(comm,local_n0,local_0_start,1,&list1));
778       PetscCall(ISCreateStride(comm,local_n0,low,1,&list2));
779       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
780       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
781       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
782       PetscCall(VecScatterDestroy(&vecscat));
783       PetscCall(ISDestroy(&list1));
784       PetscCall(ISDestroy(&list2));
785 #else
786       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
787 #endif
788       break;
789     case 2:
790 #if defined(PETSC_USE_COMPLEX)
791       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
792 
793       PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1));
794       PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2));
795       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
796       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
797       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
798       PetscCall(VecScatterDestroy(&vecscat));
799       PetscCall(ISDestroy(&list1));
800       PetscCall(ISDestroy(&list2));
801 #else
802       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
803 
804       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1));
805       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2));
806 
807       if (dim[1]%2==0) {
808         NM = dim[1]+2;
809       } else {
810         NM = dim[1]+1;
811       }
812       for (i=0; i<local_n0; i++) {
813         for (j=0; j<dim[1]; j++) {
814           tempindx  = i*dim[1] + j;
815           tempindx1 = i*NM + j;
816 
817           indx1[tempindx]=local_0_start*dim[1]+tempindx;
818           indx2[tempindx]=low+tempindx1;
819         }
820       }
821 
822       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1));
823       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2));
824 
825       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
826       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
827       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
828       PetscCall(VecScatterDestroy(&vecscat));
829       PetscCall(ISDestroy(&list1));
830       PetscCall(ISDestroy(&list2));
831       PetscCall(PetscFree(indx1));
832       PetscCall(PetscFree(indx2));
833 #endif
834       break;
835 
836     case 3:
837 #if defined(PETSC_USE_COMPLEX)
838       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
839 
840       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1));
841       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2));
842       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
843       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
844       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
845       PetscCall(VecScatterDestroy(&vecscat));
846       PetscCall(ISDestroy(&list1));
847       PetscCall(ISDestroy(&list2));
848 #else
849       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
850       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
851       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
852 
853       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1));
854       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2));
855 
856       if (dim[2]%2==0) NM = dim[2]+2;
857       else             NM = dim[2]+1;
858 
859       for (i=0; i<local_n0; i++) {
860         for (j=0; j<dim[1]; j++) {
861           for (k=0; k<dim[2]; k++) {
862             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
863             tempindx1 = i*dim[1]*NM + j*NM + k;
864 
865             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
866             indx2[tempindx]=low+tempindx1;
867           }
868         }
869       }
870 
871       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1));
872       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2));
873       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
874       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
875       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
876       PetscCall(VecScatterDestroy(&vecscat));
877       PetscCall(ISDestroy(&list1));
878       PetscCall(ISDestroy(&list2));
879       PetscCall(PetscFree(indx1));
880       PetscCall(PetscFree(indx2));
881 #endif
882       break;
883 
884     default:
885 #if defined(PETSC_USE_COMPLEX)
886       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
887 
888       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1));
889       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2));
890       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
891       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
892       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
893       PetscCall(VecScatterDestroy(&vecscat));
894       PetscCall(ISDestroy(&list1));
895       PetscCall(ISDestroy(&list2));
896 #else
897       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
898       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
899       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
900 
901       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
902 
903       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
904 
905       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
906 
907       partial_dim = fftw->partial_dim;
908 
909       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1));
910       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2));
911 
912       if (dim[ndim-1]%2==0) NM = 2;
913       else                  NM = 1;
914 
915       j = low;
916       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
917         indx1[i] = local_0_start*partial_dim + i;
918         indx2[i] = j;
919         if (k%dim[ndim-1]==0) j+=NM;
920         j++;
921       }
922       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1));
923       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2));
924       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
925       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
926       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
927       PetscCall(VecScatterDestroy(&vecscat));
928       PetscCall(ISDestroy(&list1));
929       PetscCall(ISDestroy(&list2));
930       PetscCall(PetscFree(indx1));
931       PetscCall(PetscFree(indx2));
932 #endif
933       break;
934     }
935 #endif
936   }
937   PetscFunctionReturn(0);
938 }
939 
940 /*@
941    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
942 
943    Collective on Mat
944 
945     Input Parameters:
946 +   A - FFTW matrix
947 -   x - FFTW vector
948 
949    Output Parameters:
950 .  y - PETSc vector
951 
952    Level: intermediate
953 
954    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
955          VecScatterFFTWToPetsc removes those extra zeros.
956 
957 .seealso: `VecScatterPetscToFFTW()`
958 @*/
959 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
960 {
961   PetscFunctionBegin;
962   PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
967 {
968   MPI_Comm       comm;
969   Mat_FFT        *fft  = (Mat_FFT*)A->data;
970   PetscInt       low;
971   PetscMPIInt    rank,size;
972   VecScatter     vecscat;
973   IS             list1;
974 
975   PetscFunctionBegin;
976   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
977   PetscCallMPI(MPI_Comm_size(comm, &size));
978   PetscCallMPI(MPI_Comm_rank(comm, &rank));
979   PetscCall(VecGetOwnershipRange(x,&low,NULL));
980 
981   if (size==1) {
982     PetscCall(ISCreateStride(comm,fft->N,0,1,&list1));
983     PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat));
984     PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
985     PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
986     PetscCall(VecScatterDestroy(&vecscat));
987     PetscCall(ISDestroy(&list1));
988 
989 #if !PetscDefined(HAVE_MPIUNI)
990   } else {
991     Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
992     PetscInt       ndim  = fft->ndim,*dim = fft->dim;
993     ptrdiff_t      local_n0,local_0_start;
994     ptrdiff_t      local_n1,local_1_start;
995     IS             list2;
996 #if !defined(PETSC_USE_COMPLEX)
997     PetscInt       i,j,k,partial_dim;
998     PetscInt       *indx1, *indx2, tempindx, tempindx1;
999     PetscInt       NM;
1000     ptrdiff_t      temp;
1001 #endif
1002     switch (ndim) {
1003     case 1:
1004 #if defined(PETSC_USE_COMPLEX)
1005       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1006 
1007       PetscCall(ISCreateStride(comm,local_n1,local_1_start,1,&list1));
1008       PetscCall(ISCreateStride(comm,local_n1,low,1,&list2));
1009       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
1010       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1011       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1012       PetscCall(VecScatterDestroy(&vecscat));
1013       PetscCall(ISDestroy(&list1));
1014       PetscCall(ISDestroy(&list2));
1015 #else
1016       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1017 #endif
1018       break;
1019     case 2:
1020 #if defined(PETSC_USE_COMPLEX)
1021       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1022 
1023       PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1));
1024       PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2));
1025       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
1026       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1027       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1028       PetscCall(VecScatterDestroy(&vecscat));
1029       PetscCall(ISDestroy(&list1));
1030       PetscCall(ISDestroy(&list2));
1031 #else
1032       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1033 
1034       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1));
1035       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2));
1036 
1037       if (dim[1]%2==0) NM = dim[1]+2;
1038       else             NM = dim[1]+1;
1039 
1040       for (i=0; i<local_n0; i++) {
1041         for (j=0; j<dim[1]; j++) {
1042           tempindx = i*dim[1] + j;
1043           tempindx1 = i*NM + j;
1044 
1045           indx1[tempindx]=local_0_start*dim[1]+tempindx;
1046           indx2[tempindx]=low+tempindx1;
1047         }
1048       }
1049 
1050       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1));
1051       PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2));
1052 
1053       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
1054       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1055       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1056       PetscCall(VecScatterDestroy(&vecscat));
1057       PetscCall(ISDestroy(&list1));
1058       PetscCall(ISDestroy(&list2));
1059       PetscCall(PetscFree(indx1));
1060       PetscCall(PetscFree(indx2));
1061 #endif
1062       break;
1063     case 3:
1064 #if defined(PETSC_USE_COMPLEX)
1065       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1066 
1067       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1));
1068       PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2));
1069       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
1070       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1071       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1072       PetscCall(VecScatterDestroy(&vecscat));
1073       PetscCall(ISDestroy(&list1));
1074       PetscCall(ISDestroy(&list2));
1075 #else
1076       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1077 
1078       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1));
1079       PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2));
1080 
1081       if (dim[2]%2==0) NM = dim[2]+2;
1082       else             NM = dim[2]+1;
1083 
1084       for (i=0; i<local_n0; i++) {
1085         for (j=0; j<dim[1]; j++) {
1086           for (k=0; k<dim[2]; k++) {
1087             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1088             tempindx1 = i*dim[1]*NM + j*NM + k;
1089 
1090             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1091             indx2[tempindx]=low+tempindx1;
1092           }
1093         }
1094       }
1095 
1096       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1));
1097       PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2));
1098 
1099       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
1100       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1101       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1102       PetscCall(VecScatterDestroy(&vecscat));
1103       PetscCall(ISDestroy(&list1));
1104       PetscCall(ISDestroy(&list2));
1105       PetscCall(PetscFree(indx1));
1106       PetscCall(PetscFree(indx2));
1107 #endif
1108       break;
1109     default:
1110 #if defined(PETSC_USE_COMPLEX)
1111       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1112 
1113       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1));
1114       PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2));
1115       PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat));
1116       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1117       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1118       PetscCall(VecScatterDestroy(&vecscat));
1119       PetscCall(ISDestroy(&list1));
1120       PetscCall(ISDestroy(&list2));
1121 #else
1122       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1123 
1124       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1125 
1126       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1127 
1128       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1129 
1130       partial_dim = fftw->partial_dim;
1131 
1132       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1));
1133       PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2));
1134 
1135       if (dim[ndim-1]%2==0) NM = 2;
1136       else                  NM = 1;
1137 
1138       j = low;
1139       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1140         indx1[i] = local_0_start*partial_dim + i;
1141         indx2[i] = j;
1142         if (k%dim[ndim-1]==0) j+=NM;
1143         j++;
1144       }
1145       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1));
1146       PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2));
1147 
1148       PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat));
1149       PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1150       PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD));
1151       PetscCall(VecScatterDestroy(&vecscat));
1152       PetscCall(ISDestroy(&list1));
1153       PetscCall(ISDestroy(&list2));
1154       PetscCall(PetscFree(indx1));
1155       PetscCall(PetscFree(indx2));
1156 #endif
1157       break;
1158     }
1159 #endif
1160   }
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 /*
1165     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1166 
1167   Options Database Keys:
1168 + -mat_fftw_plannerflags - set FFTW planner flags
1169 
1170    Level: intermediate
1171 
1172 */
1173 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1174 {
1175   MPI_Comm       comm;
1176   Mat_FFT        *fft = (Mat_FFT*)A->data;
1177   Mat_FFTW       *fftw;
1178   PetscInt       ndim = fft->ndim,*dim = fft->dim;
1179   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1180   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1181   PetscBool      flg;
1182   PetscInt       p_flag,partial_dim=1,ctr;
1183   PetscMPIInt    size,rank;
1184   ptrdiff_t      *pdim;
1185 #if !defined(PETSC_USE_COMPLEX)
1186   PetscInt       tot_dim;
1187 #endif
1188 
1189   PetscFunctionBegin;
1190   PetscCall(PetscObjectGetComm((PetscObject)A,&comm));
1191   PetscCallMPI(MPI_Comm_size(comm, &size));
1192   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1193 
1194 #if !PetscDefined(HAVE_MPIUNI)
1195   fftw_mpi_init();
1196 #endif
1197   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1198   pdim[0] = dim[0];
1199 #if !defined(PETSC_USE_COMPLEX)
1200   tot_dim = 2*dim[0];
1201 #endif
1202   for (ctr=1; ctr<ndim; ctr++) {
1203     partial_dim *= dim[ctr];
1204     pdim[ctr]    = dim[ctr];
1205 #if !defined(PETSC_USE_COMPLEX)
1206     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1207     else             tot_dim *= dim[ctr];
1208 #endif
1209   }
1210 
1211   if (size == 1) {
1212 #if defined(PETSC_USE_COMPLEX)
1213     PetscCall(MatSetSizes(A,fft->N,fft->N,fft->N,fft->N));
1214     fft->n = fft->N;
1215 #else
1216     PetscCall(MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim));
1217     fft->n = tot_dim;
1218 #endif
1219 #if !PetscDefined(HAVE_MPIUNI)
1220   } else {
1221     ptrdiff_t local_n0,local_0_start,local_n1,local_1_start;
1222 #if !defined(PETSC_USE_COMPLEX)
1223     ptrdiff_t temp;
1224     PetscInt  N1;
1225 #endif
1226 
1227     switch (ndim) {
1228     case 1:
1229 #if !defined(PETSC_USE_COMPLEX)
1230       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1231 #else
1232       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1233       fft->n = (PetscInt)local_n0;
1234       PetscCall(MatSetSizes(A,local_n1,fft->n,fft->N,fft->N));
1235 #endif
1236       break;
1237     case 2:
1238 #if defined(PETSC_USE_COMPLEX)
1239       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1240       fft->n    = (PetscInt)local_n0*dim[1];
1241       PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N));
1242 #else
1243       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1244 
1245       fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1246       PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)));
1247 #endif
1248       break;
1249     case 3:
1250 #if defined(PETSC_USE_COMPLEX)
1251       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1252 
1253       fft->n = (PetscInt)local_n0*dim[1]*dim[2];
1254       PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N));
1255 #else
1256       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1257 
1258       fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1259       PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)));
1260 #endif
1261       break;
1262     default:
1263 #if defined(PETSC_USE_COMPLEX)
1264       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1265 
1266       fft->n = (PetscInt)local_n0*partial_dim;
1267       PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N));
1268 #else
1269       temp = pdim[ndim-1];
1270 
1271       pdim[ndim-1] = temp/2 + 1;
1272 
1273       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1274 
1275       fft->n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1276       N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1277 
1278       pdim[ndim-1] = temp;
1279 
1280       PetscCall(MatSetSizes(A,fft->n,fft->n,N1,N1));
1281 #endif
1282       break;
1283     }
1284 #endif
1285   }
1286   free(pdim);
1287   PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATFFTW));
1288   PetscCall(PetscNewLog(A,&fftw));
1289   fft->data = (void*)fftw;
1290 
1291   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1292   fftw->partial_dim = partial_dim;
1293 
1294   PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
1295   if (size == 1) {
1296 #if defined(PETSC_USE_64BIT_INDICES)
1297     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1298 #else
1299     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1300 #endif
1301   }
1302 
1303   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1304 
1305   fftw->p_forward  = NULL;
1306   fftw->p_backward = NULL;
1307   fftw->p_flag     = FFTW_ESTIMATE;
1308 
1309   if (size == 1) {
1310     A->ops->mult          = MatMult_SeqFFTW;
1311     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1312 #if !PetscDefined(HAVE_MPIUNI)
1313   } else {
1314     A->ops->mult          = MatMult_MPIFFTW;
1315     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1316 #endif
1317   }
1318   fft->matdestroy = MatDestroy_FFTW;
1319   A->assembled    = PETSC_TRUE;
1320   A->preallocated = PETSC_TRUE;
1321 
1322   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW));
1323   PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW));
1324   PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW));
1325 
1326   /* get runtime options */
1327   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");
1328   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg));
1329   if (flg) fftw->p_flag = iplans[p_flag];
1330   PetscOptionsEnd();
1331   PetscFunctionReturn(0);
1332 }
1333