xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision df4cd43f92eaa320656440c40edb1046daee8f75)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #if !PetscDefined(HAVE_MPIUNI)
10   #include <fftw3-mpi.h>
11 #else
12   #include <fftw3.h>
13 #endif
14 EXTERN_C_END
15 
16 typedef struct {
17   ptrdiff_t ndim_fftw, *dim_fftw;
18 #if defined(PETSC_USE_64BIT_INDICES)
19   fftw_iodim64 *iodims;
20 #else
21   fftw_iodim *iodims;
22 #endif
23   PetscInt     partial_dim;
24   fftw_plan    p_forward, p_backward;
25   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be
27                                                             executed for the arrays with which the plan was created */
28 } Mat_FFTW;
29 
30 extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec);
31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec);
32 extern PetscErrorCode MatDestroy_FFTW(Mat);
33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
34 #if !PetscDefined(HAVE_MPIUNI)
35 extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec);
36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec);
37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
38 #endif
39 
40 /*
41    MatMult_SeqFFTW performs forward DFT
42    Input parameter:
43      A - the matrix
44      x - the vector on which FDFT will be performed
45 
46    Output parameter:
47      y - vector that stores result of FDFT
48 */
49 PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
50 {
51   Mat_FFT           *fft  = (Mat_FFT *)A->data;
52   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
53   const PetscScalar *x_array;
54   PetscScalar       *y_array;
55 #if defined(PETSC_USE_COMPLEX)
56   #if defined(PETSC_USE_64BIT_INDICES)
57   fftw_iodim64 *iodims;
58   #else
59   fftw_iodim *iodims;
60   #endif
61   PetscInt i;
62 #endif
63   PetscInt ndim = fft->ndim, *dim = fft->dim;
64 
65   PetscFunctionBegin;
66   PetscCall(VecGetArrayRead(x, &x_array));
67   PetscCall(VecGetArray(y, &y_array));
68   if (!fftw->p_forward) { /* create a plan, then execute it */
69     switch (ndim) {
70     case 1:
71 #if defined(PETSC_USE_COMPLEX)
72       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
73 #else
74       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
75 #endif
76       break;
77     case 2:
78 #if defined(PETSC_USE_COMPLEX)
79       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
80 #else
81       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
82 #endif
83       break;
84     case 3:
85 #if defined(PETSC_USE_COMPLEX)
86       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
87 #else
88       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
89 #endif
90       break;
91     default:
92 #if defined(PETSC_USE_COMPLEX)
93       iodims = fftw->iodims;
94   #if defined(PETSC_USE_64BIT_INDICES)
95       if (ndim) {
96         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
97         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
98         for (i = ndim - 2; i >= 0; --i) {
99           iodims[i].n  = (ptrdiff_t)dim[i];
100           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
101         }
102       }
103       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
104   #else
105       if (ndim) {
106         iodims[ndim - 1].n  = (int)dim[ndim - 1];
107         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
108         for (i = ndim - 2; i >= 0; --i) {
109           iodims[i].n  = (int)dim[i];
110           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
111         }
112       }
113       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
114   #endif
115 
116 #else
117       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
118 #endif
119       break;
120     }
121     fftw->finarray  = (PetscScalar *)x_array;
122     fftw->foutarray = y_array;
123     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
124                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
125     fftw_execute(fftw->p_forward);
126   } else {                                                         /* use existing plan */
127     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
128 #if defined(PETSC_USE_COMPLEX)
129       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
130 #else
131       fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
132 #endif
133     } else {
134       fftw_execute(fftw->p_forward);
135     }
136   }
137   PetscCall(VecRestoreArray(y, &y_array));
138   PetscCall(VecRestoreArrayRead(x, &x_array));
139   PetscFunctionReturn(PETSC_SUCCESS);
140 }
141 
142 /* MatMultTranspose_SeqFFTW performs serial backward DFT
143    Input parameter:
144      A - the matrix
145      x - the vector on which BDFT will be performed
146 
147    Output parameter:
148      y - vector that stores result of BDFT
149 */
150 
151 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
152 {
153   Mat_FFT           *fft  = (Mat_FFT *)A->data;
154   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
155   const PetscScalar *x_array;
156   PetscScalar       *y_array;
157   PetscInt           ndim = fft->ndim, *dim = fft->dim;
158 #if defined(PETSC_USE_COMPLEX)
159   #if defined(PETSC_USE_64BIT_INDICES)
160   fftw_iodim64 *iodims = fftw->iodims;
161   #else
162   fftw_iodim *iodims = fftw->iodims;
163   #endif
164 #endif
165 
166   PetscFunctionBegin;
167   PetscCall(VecGetArrayRead(x, &x_array));
168   PetscCall(VecGetArray(y, &y_array));
169   if (!fftw->p_backward) { /* create a plan, then execute it */
170     switch (ndim) {
171     case 1:
172 #if defined(PETSC_USE_COMPLEX)
173       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
174 #else
175       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
176 #endif
177       break;
178     case 2:
179 #if defined(PETSC_USE_COMPLEX)
180       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
181 #else
182       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
183 #endif
184       break;
185     case 3:
186 #if defined(PETSC_USE_COMPLEX)
187       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
188 #else
189       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
190 #endif
191       break;
192     default:
193 #if defined(PETSC_USE_COMPLEX)
194   #if defined(PETSC_USE_64BIT_INDICES)
195       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
196   #else
197       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
198   #endif
199 #else
200       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
201 #endif
202       break;
203     }
204     fftw->binarray  = (PetscScalar *)x_array;
205     fftw->boutarray = y_array;
206     fftw_execute(fftw->p_backward);
207   } else {                                                         /* use existing plan */
208     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
209 #if defined(PETSC_USE_COMPLEX)
210       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
211 #else
212       fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
213 #endif
214     } else {
215       fftw_execute(fftw->p_backward);
216     }
217   }
218   PetscCall(VecRestoreArray(y, &y_array));
219   PetscCall(VecRestoreArrayRead(x, &x_array));
220   PetscFunctionReturn(PETSC_SUCCESS);
221 }
222 
223 #if !PetscDefined(HAVE_MPIUNI)
224 /* MatMult_MPIFFTW performs forward DFT in parallel
225    Input parameter:
226      A - the matrix
227      x - the vector on which FDFT will be performed
228 
229    Output parameter:
230    y   - vector that stores result of FDFT
231 */
232 PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
233 {
234   Mat_FFT           *fft  = (Mat_FFT *)A->data;
235   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
236   const PetscScalar *x_array;
237   PetscScalar       *y_array;
238   PetscInt           ndim = fft->ndim, *dim = fft->dim;
239   MPI_Comm           comm;
240 
241   PetscFunctionBegin;
242   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
243   PetscCall(VecGetArrayRead(x, &x_array));
244   PetscCall(VecGetArray(y, &y_array));
245   if (!fftw->p_forward) { /* create a plan, then execute it */
246     switch (ndim) {
247     case 1:
248   #if defined(PETSC_USE_COMPLEX)
249       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
250   #else
251       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
252   #endif
253       break;
254     case 2:
255   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
256       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
257   #else
258       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
259   #endif
260       break;
261     case 3:
262   #if defined(PETSC_USE_COMPLEX)
263       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
264   #else
265       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
266   #endif
267       break;
268     default:
269   #if defined(PETSC_USE_COMPLEX)
270       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
271   #else
272       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
273   #endif
274       break;
275     }
276     fftw->finarray  = (PetscScalar *)x_array;
277     fftw->foutarray = y_array;
278     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
279                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
280     fftw_execute(fftw->p_forward);
281   } else {                                                         /* use existing plan */
282     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
283       fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
284     } else {
285       fftw_execute(fftw->p_forward);
286     }
287   }
288   PetscCall(VecRestoreArray(y, &y_array));
289   PetscCall(VecRestoreArrayRead(x, &x_array));
290   PetscFunctionReturn(PETSC_SUCCESS);
291 }
292 
293 /*
294    MatMultTranspose_MPIFFTW performs parallel backward DFT
295    Input parameter:
296      A - the matrix
297      x - the vector on which BDFT will be performed
298 
299    Output parameter:
300      y - vector that stores result of BDFT
301 */
302 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
303 {
304   Mat_FFT           *fft  = (Mat_FFT *)A->data;
305   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
306   const PetscScalar *x_array;
307   PetscScalar       *y_array;
308   PetscInt           ndim = fft->ndim, *dim = fft->dim;
309   MPI_Comm           comm;
310 
311   PetscFunctionBegin;
312   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
313   PetscCall(VecGetArrayRead(x, &x_array));
314   PetscCall(VecGetArray(y, &y_array));
315   if (!fftw->p_backward) { /* create a plan, then execute it */
316     switch (ndim) {
317     case 1:
318   #if defined(PETSC_USE_COMPLEX)
319       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
320   #else
321       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
322   #endif
323       break;
324     case 2:
325   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
326       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
327   #else
328       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
329   #endif
330       break;
331     case 3:
332   #if defined(PETSC_USE_COMPLEX)
333       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
334   #else
335       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
336   #endif
337       break;
338     default:
339   #if defined(PETSC_USE_COMPLEX)
340       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
341   #else
342       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
343   #endif
344       break;
345     }
346     fftw->binarray  = (PetscScalar *)x_array;
347     fftw->boutarray = y_array;
348     fftw_execute(fftw->p_backward);
349   } else {                                                         /* use existing plan */
350     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
351       fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
352     } else {
353       fftw_execute(fftw->p_backward);
354     }
355   }
356   PetscCall(VecRestoreArray(y, &y_array));
357   PetscCall(VecRestoreArrayRead(x, &x_array));
358   PetscFunctionReturn(PETSC_SUCCESS);
359 }
360 #endif
361 
362 PetscErrorCode MatDestroy_FFTW(Mat A)
363 {
364   Mat_FFT  *fft  = (Mat_FFT *)A->data;
365   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
366 
367   PetscFunctionBegin;
368   fftw_destroy_plan(fftw->p_forward);
369   fftw_destroy_plan(fftw->p_backward);
370   if (fftw->iodims) free(fftw->iodims);
371   PetscCall(PetscFree(fftw->dim_fftw));
372   PetscCall(PetscFree(fft->data));
373   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
374   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
375   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
376 #if !PetscDefined(HAVE_MPIUNI)
377   fftw_mpi_cleanup();
378 #endif
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 #if !PetscDefined(HAVE_MPIUNI)
383   #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
384 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
385 {
386   PetscScalar *array;
387 
388   PetscFunctionBegin;
389   PetscCall(VecGetArray(v, &array));
390   fftw_free((fftw_complex *)array);
391   PetscCall(VecRestoreArray(v, &array));
392   PetscCall(VecDestroy_MPI(v));
393   PetscFunctionReturn(PETSC_SUCCESS);
394 }
395 #endif
396 
397 #if !PetscDefined(HAVE_MPIUNI)
398 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
399 {
400   Mat A;
401 
402   PetscFunctionBegin;
403   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
404   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
405   PetscFunctionReturn(PETSC_SUCCESS);
406 }
407 
408 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
409 {
410   Mat A;
411 
412   PetscFunctionBegin;
413   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
414   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
415   PetscFunctionReturn(PETSC_SUCCESS);
416 }
417 
418 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
419 {
420   Mat A;
421 
422   PetscFunctionBegin;
423   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
424   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
425   PetscFunctionReturn(PETSC_SUCCESS);
426 }
427 #endif
428 
429 /*@
430    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
431      parallel layout determined by `MATFFTW`
432 
433    Collective
434 
435    Input Parameter:
436 .   A - the matrix
437 
438    Output Parameters:
439 +   x - (optional) input vector of forward FFTW
440 .   y - (optional) output vector of forward FFTW
441 -   z - (optional) output vector of backward FFTW
442 
443    Options Database Key:
444 .  -mat_fftw_plannerflags - set FFTW planner flags
445 
446   Level: advanced
447 
448   Notes:
449   The parallel layout of output of forward FFTW is always same as the input
450   of the backward FFTW. But parallel layout of the input vector of forward
451   FFTW might not be same as the output of backward FFTW.
452 
453   We need to provide enough space while doing parallel real transform.
454   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
455   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
456   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
457   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
458   zeros if it is odd only one zero is needed.
459 
460   Lastly one needs some scratch space at the end of data set in each process. alloc_local
461   figures out how much space is needed, i.e. it figures out the data+scratch space for
462   each processor and returns that.
463 
464   Developer Note:
465   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
466 
467 .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
468 @*/
469 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
470 {
471   PetscFunctionBegin;
472   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
473   PetscFunctionReturn(PETSC_SUCCESS);
474 }
475 
476 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
477 {
478   PetscMPIInt size, rank;
479   MPI_Comm    comm;
480   Mat_FFT    *fft = (Mat_FFT *)A->data;
481 
482   PetscFunctionBegin;
483   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
484   PetscValidType(A, 1);
485   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
486 
487   PetscCallMPI(MPI_Comm_size(comm, &size));
488   PetscCallMPI(MPI_Comm_rank(comm, &rank));
489   if (size == 1) { /* sequential case */
490 #if defined(PETSC_USE_COMPLEX)
491     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
492     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
493     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
494 #else
495     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
496     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
497     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
498 #endif
499 #if !PetscDefined(HAVE_MPIUNI)
500   } else { /* parallel cases */
501     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
502     PetscInt      ndim = fft->ndim, *dim = fft->dim;
503     ptrdiff_t     alloc_local, local_n0, local_0_start;
504     ptrdiff_t     local_n1;
505     fftw_complex *data_fout;
506     ptrdiff_t     local_1_start;
507   #if defined(PETSC_USE_COMPLEX)
508     fftw_complex *data_fin, *data_bout;
509   #else
510     double   *data_finr, *data_boutr;
511     PetscInt  n1, N1;
512     ptrdiff_t temp;
513   #endif
514 
515     switch (ndim) {
516     case 1:
517   #if !defined(PETSC_USE_COMPLEX)
518       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
519   #else
520       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
521       if (fin) {
522         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
523         PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin));
524         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
525         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
526         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
527       }
528       if (fout) {
529         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
530         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout));
531         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
532         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
533         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
534       }
535       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
536       if (bout) {
537         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
538         PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout));
539         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
540         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
541         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
542       }
543       break;
544   #endif
545     case 2:
546   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
547       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
548       N1          = 2 * dim[0] * (dim[1] / 2 + 1);
549       n1          = 2 * local_n0 * (dim[1] / 2 + 1);
550       if (fin) {
551         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
552         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
553         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
554         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
555         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
556       }
557       if (fout) {
558         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
559         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
560         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
561         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
562         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
563       }
564       if (bout) {
565         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
566         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
567         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
568         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
569         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
570       }
571   #else
572       /* Get local size */
573       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
574       if (fin) {
575         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
576         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
577         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
578         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
579         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
580       }
581       if (fout) {
582         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
583         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
584         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
585         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
586         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
587       }
588       if (bout) {
589         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
590         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
591         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
592         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
593         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
594       }
595   #endif
596       break;
597     case 3:
598   #if !defined(PETSC_USE_COMPLEX)
599       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
600       N1          = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1);
601       n1          = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1);
602       if (fin) {
603         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
604         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
605         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
606         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
607         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
608       }
609       if (fout) {
610         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
611         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
612         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
613         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
614         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
615       }
616       if (bout) {
617         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
618         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
619         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
620         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
621         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
622       }
623   #else
624       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
625       if (fin) {
626         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
627         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
628         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
629         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
630         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
631       }
632       if (fout) {
633         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
634         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
635         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
636         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
637         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
638       }
639       if (bout) {
640         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
641         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
642         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
643         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
644         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
645       }
646   #endif
647       break;
648     default:
649   #if !defined(PETSC_USE_COMPLEX)
650       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
651       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
652       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
653       N1                                    = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
654       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
655 
656       if (fin) {
657         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
658         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
659         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
660         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
661         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
662       }
663       if (fout) {
664         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
665         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
666         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
667         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
668         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
669       }
670       if (bout) {
671         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
672         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
673         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
674         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
675         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
676       }
677   #else
678       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
679       if (fin) {
680         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
681         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
682         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
683         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
684         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
685       }
686       if (fout) {
687         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
688         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
689         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
690         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
691         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
692       }
693       if (bout) {
694         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
695         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
696         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
697         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
698         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
699       }
700   #endif
701       break;
702     }
703     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
704        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
705        memory leaks. We void these pointers here to avoid accident uses.
706     */
707     if (fin) (*fin)->ops->replacearray = NULL;
708     if (fout) (*fout)->ops->replacearray = NULL;
709     if (bout) (*bout)->ops->replacearray = NULL;
710 #endif
711   }
712   PetscFunctionReturn(PETSC_SUCCESS);
713 }
714 
715 /*@
716    VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
717 
718    Collective
719 
720    Input Parameters:
721 +  A - FFTW matrix
722 -  x - the PETSc vector
723 
724    Output Parameters:
725 .  y - the FFTW vector
726 
727    Level: intermediate
728 
729    Note:
730    For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
731    one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
732    zeros. This routine does that job by scattering operation.
733 
734 .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
735 @*/
736 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
737 {
738   PetscFunctionBegin;
739   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
740   PetscFunctionReturn(PETSC_SUCCESS);
741 }
742 
743 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
744 {
745   MPI_Comm    comm;
746   Mat_FFT    *fft = (Mat_FFT *)A->data;
747   PetscInt    low;
748   PetscMPIInt rank, size;
749   PetscInt    vsize, vsize1;
750   VecScatter  vecscat;
751   IS          list1;
752 
753   PetscFunctionBegin;
754   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
755   PetscCallMPI(MPI_Comm_size(comm, &size));
756   PetscCallMPI(MPI_Comm_rank(comm, &rank));
757   PetscCall(VecGetOwnershipRange(y, &low, NULL));
758 
759   if (size == 1) {
760     PetscCall(VecGetSize(x, &vsize));
761     PetscCall(VecGetSize(y, &vsize1));
762     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
763     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
764     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
765     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
766     PetscCall(VecScatterDestroy(&vecscat));
767     PetscCall(ISDestroy(&list1));
768 #if !PetscDefined(HAVE_MPIUNI)
769   } else {
770     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
771     PetscInt  ndim = fft->ndim, *dim = fft->dim;
772     ptrdiff_t local_n0, local_0_start;
773     ptrdiff_t local_n1, local_1_start;
774     IS        list2;
775   #if !defined(PETSC_USE_COMPLEX)
776     PetscInt  i, j, k, partial_dim;
777     PetscInt *indx1, *indx2, tempindx, tempindx1;
778     PetscInt  NM;
779     ptrdiff_t temp;
780   #endif
781 
782     switch (ndim) {
783     case 1:
784   #if defined(PETSC_USE_COMPLEX)
785       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
786 
787       PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1));
788       PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2));
789       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
790       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
791       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
792       PetscCall(VecScatterDestroy(&vecscat));
793       PetscCall(ISDestroy(&list1));
794       PetscCall(ISDestroy(&list2));
795   #else
796       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
797   #endif
798       break;
799     case 2:
800   #if defined(PETSC_USE_COMPLEX)
801       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
802 
803       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
804       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
805       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
806       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
807       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
808       PetscCall(VecScatterDestroy(&vecscat));
809       PetscCall(ISDestroy(&list1));
810       PetscCall(ISDestroy(&list2));
811   #else
812       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
813 
814       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
815       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
816 
817       if (dim[1] % 2 == 0) {
818         NM = dim[1] + 2;
819       } else {
820         NM = dim[1] + 1;
821       }
822       for (i = 0; i < local_n0; i++) {
823         for (j = 0; j < dim[1]; j++) {
824           tempindx  = i * dim[1] + j;
825           tempindx1 = i * NM + j;
826 
827           indx1[tempindx] = local_0_start * dim[1] + tempindx;
828           indx2[tempindx] = low + tempindx1;
829         }
830       }
831 
832       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
833       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
834 
835       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
836       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
837       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
838       PetscCall(VecScatterDestroy(&vecscat));
839       PetscCall(ISDestroy(&list1));
840       PetscCall(ISDestroy(&list2));
841       PetscCall(PetscFree(indx1));
842       PetscCall(PetscFree(indx2));
843   #endif
844       break;
845 
846     case 3:
847   #if defined(PETSC_USE_COMPLEX)
848       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
849 
850       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
851       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
852       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
853       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
854       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
855       PetscCall(VecScatterDestroy(&vecscat));
856       PetscCall(ISDestroy(&list1));
857       PetscCall(ISDestroy(&list2));
858   #else
859       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
860       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
861       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
862 
863       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
864       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
865 
866       if (dim[2] % 2 == 0) NM = dim[2] + 2;
867       else NM = dim[2] + 1;
868 
869       for (i = 0; i < local_n0; i++) {
870         for (j = 0; j < dim[1]; j++) {
871           for (k = 0; k < dim[2]; k++) {
872             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
873             tempindx1 = i * dim[1] * NM + j * NM + k;
874 
875             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
876             indx2[tempindx] = low + tempindx1;
877           }
878         }
879       }
880 
881       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
882       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
883       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
884       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
885       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
886       PetscCall(VecScatterDestroy(&vecscat));
887       PetscCall(ISDestroy(&list1));
888       PetscCall(ISDestroy(&list2));
889       PetscCall(PetscFree(indx1));
890       PetscCall(PetscFree(indx2));
891   #endif
892       break;
893 
894     default:
895   #if defined(PETSC_USE_COMPLEX)
896       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
897 
898       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
899       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
900       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
901       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
902       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
903       PetscCall(VecScatterDestroy(&vecscat));
904       PetscCall(ISDestroy(&list1));
905       PetscCall(ISDestroy(&list2));
906   #else
907       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
908       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
909       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
910 
911       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
912 
913       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
914 
915       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
916 
917       partial_dim = fftw->partial_dim;
918 
919       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
920       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
921 
922       if (dim[ndim - 1] % 2 == 0) NM = 2;
923       else NM = 1;
924 
925       j = low;
926       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
927         indx1[i] = local_0_start * partial_dim + i;
928         indx2[i] = j;
929         if (k % dim[ndim - 1] == 0) j += NM;
930         j++;
931       }
932       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
933       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
934       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
935       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
936       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
937       PetscCall(VecScatterDestroy(&vecscat));
938       PetscCall(ISDestroy(&list1));
939       PetscCall(ISDestroy(&list2));
940       PetscCall(PetscFree(indx1));
941       PetscCall(PetscFree(indx2));
942   #endif
943       break;
944     }
945 #endif
946   }
947   PetscFunctionReturn(PETSC_SUCCESS);
948 }
949 
950 /*@
951    VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
952 
953    Collective
954 
955     Input Parameters:
956 +   A - `MATFFTW` matrix
957 -   x - FFTW vector
958 
959    Output Parameters:
960 .  y - PETSc vector
961 
962    Level: intermediate
963 
964    Note:
965    While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
966    `VecScatterFFTWToPetsc()` removes those extra zeros.
967 
968 .seealso: [](chapter_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
969 @*/
970 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
971 {
972   PetscFunctionBegin;
973   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
974   PetscFunctionReturn(PETSC_SUCCESS);
975 }
976 
977 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
978 {
979   MPI_Comm    comm;
980   Mat_FFT    *fft = (Mat_FFT *)A->data;
981   PetscInt    low;
982   PetscMPIInt rank, size;
983   VecScatter  vecscat;
984   IS          list1;
985 
986   PetscFunctionBegin;
987   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
988   PetscCallMPI(MPI_Comm_size(comm, &size));
989   PetscCallMPI(MPI_Comm_rank(comm, &rank));
990   PetscCall(VecGetOwnershipRange(x, &low, NULL));
991 
992   if (size == 1) {
993     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
994     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
995     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
996     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
997     PetscCall(VecScatterDestroy(&vecscat));
998     PetscCall(ISDestroy(&list1));
999 
1000 #if !PetscDefined(HAVE_MPIUNI)
1001   } else {
1002     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
1003     PetscInt  ndim = fft->ndim, *dim = fft->dim;
1004     ptrdiff_t local_n0, local_0_start;
1005     ptrdiff_t local_n1, local_1_start;
1006     IS        list2;
1007   #if !defined(PETSC_USE_COMPLEX)
1008     PetscInt  i, j, k, partial_dim;
1009     PetscInt *indx1, *indx2, tempindx, tempindx1;
1010     PetscInt  NM;
1011     ptrdiff_t temp;
1012   #endif
1013     switch (ndim) {
1014     case 1:
1015   #if defined(PETSC_USE_COMPLEX)
1016       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1017 
1018       PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1));
1019       PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2));
1020       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1021       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1022       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1023       PetscCall(VecScatterDestroy(&vecscat));
1024       PetscCall(ISDestroy(&list1));
1025       PetscCall(ISDestroy(&list2));
1026   #else
1027       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
1028   #endif
1029       break;
1030     case 2:
1031   #if defined(PETSC_USE_COMPLEX)
1032       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1033 
1034       PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1));
1035       PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2));
1036       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1037       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1038       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1039       PetscCall(VecScatterDestroy(&vecscat));
1040       PetscCall(ISDestroy(&list1));
1041       PetscCall(ISDestroy(&list2));
1042   #else
1043       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1044 
1045       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1));
1046       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2));
1047 
1048       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1049       else NM = dim[1] + 1;
1050 
1051       for (i = 0; i < local_n0; i++) {
1052         for (j = 0; j < dim[1]; j++) {
1053           tempindx  = i * dim[1] + j;
1054           tempindx1 = i * NM + j;
1055 
1056           indx1[tempindx] = local_0_start * dim[1] + tempindx;
1057           indx2[tempindx] = low + tempindx1;
1058         }
1059       }
1060 
1061       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1));
1062       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2));
1063 
1064       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1065       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1066       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1067       PetscCall(VecScatterDestroy(&vecscat));
1068       PetscCall(ISDestroy(&list1));
1069       PetscCall(ISDestroy(&list2));
1070       PetscCall(PetscFree(indx1));
1071       PetscCall(PetscFree(indx2));
1072   #endif
1073       break;
1074     case 3:
1075   #if defined(PETSC_USE_COMPLEX)
1076       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1077 
1078       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1));
1079       PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2));
1080       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1081       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1082       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1083       PetscCall(VecScatterDestroy(&vecscat));
1084       PetscCall(ISDestroy(&list1));
1085       PetscCall(ISDestroy(&list2));
1086   #else
1087       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1088 
1089       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1));
1090       PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2));
1091 
1092       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1093       else NM = dim[2] + 1;
1094 
1095       for (i = 0; i < local_n0; i++) {
1096         for (j = 0; j < dim[1]; j++) {
1097           for (k = 0; k < dim[2]; k++) {
1098             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
1099             tempindx1 = i * dim[1] * NM + j * NM + k;
1100 
1101             indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx;
1102             indx2[tempindx] = low + tempindx1;
1103           }
1104         }
1105       }
1106 
1107       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
1108       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
1109 
1110       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1111       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1112       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1113       PetscCall(VecScatterDestroy(&vecscat));
1114       PetscCall(ISDestroy(&list1));
1115       PetscCall(ISDestroy(&list2));
1116       PetscCall(PetscFree(indx1));
1117       PetscCall(PetscFree(indx2));
1118   #endif
1119       break;
1120     default:
1121   #if defined(PETSC_USE_COMPLEX)
1122       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
1123 
1124       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1));
1125       PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2));
1126       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
1127       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1128       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1129       PetscCall(VecScatterDestroy(&vecscat));
1130       PetscCall(ISDestroy(&list1));
1131       PetscCall(ISDestroy(&list2));
1132   #else
1133       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
1134 
1135       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
1136 
1137       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1138 
1139       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1140 
1141       partial_dim = fftw->partial_dim;
1142 
1143       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1));
1144       PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2));
1145 
1146       if (dim[ndim - 1] % 2 == 0) NM = 2;
1147       else NM = 1;
1148 
1149       j = low;
1150       for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) {
1151         indx1[i] = local_0_start * partial_dim + i;
1152         indx2[i] = j;
1153         if (k % dim[ndim - 1] == 0) j += NM;
1154         j++;
1155       }
1156       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
1157       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
1158 
1159       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
1160       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1161       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
1162       PetscCall(VecScatterDestroy(&vecscat));
1163       PetscCall(ISDestroy(&list1));
1164       PetscCall(ISDestroy(&list2));
1165       PetscCall(PetscFree(indx1));
1166       PetscCall(PetscFree(indx2));
1167   #endif
1168       break;
1169     }
1170 #endif
1171   }
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1176 {
1177   MPI_Comm    comm;
1178   Mat_FFT    *fft = (Mat_FFT *)A->data;
1179   Mat_FFTW   *fftw;
1180   PetscInt    ndim = fft->ndim, *dim = fft->dim;
1181   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
1182   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1183   PetscBool   flg;
1184   PetscInt    p_flag, partial_dim = 1, ctr;
1185   PetscMPIInt size, rank;
1186   ptrdiff_t  *pdim;
1187 #if !defined(PETSC_USE_COMPLEX)
1188   PetscInt tot_dim;
1189 #endif
1190 
1191   PetscFunctionBegin;
1192   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1193   PetscCallMPI(MPI_Comm_size(comm, &size));
1194   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1195 
1196 #if !PetscDefined(HAVE_MPIUNI)
1197   fftw_mpi_init();
1198 #endif
1199   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
1200   pdim[0] = dim[0];
1201 #if !defined(PETSC_USE_COMPLEX)
1202   tot_dim = 2 * dim[0];
1203 #endif
1204   for (ctr = 1; ctr < ndim; ctr++) {
1205     partial_dim *= dim[ctr];
1206     pdim[ctr] = dim[ctr];
1207 #if !defined(PETSC_USE_COMPLEX)
1208     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1209     else tot_dim *= dim[ctr];
1210 #endif
1211   }
1212 
1213   if (size == 1) {
1214 #if defined(PETSC_USE_COMPLEX)
1215     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
1216     fft->n = fft->N;
1217 #else
1218     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
1219     fft->n       = tot_dim;
1220 #endif
1221 #if !PetscDefined(HAVE_MPIUNI)
1222   } else {
1223     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
1224   #if !defined(PETSC_USE_COMPLEX)
1225     ptrdiff_t temp;
1226     PetscInt  N1;
1227   #endif
1228 
1229     switch (ndim) {
1230     case 1:
1231   #if !defined(PETSC_USE_COMPLEX)
1232       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1233   #else
1234       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1235       fft->n = (PetscInt)local_n0;
1236       PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N));
1237   #endif
1238       break;
1239     case 2:
1240   #if defined(PETSC_USE_COMPLEX)
1241       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
1242       fft->n = (PetscInt)local_n0 * dim[1];
1243       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1244   #else
1245       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1246 
1247       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
1248       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
1249   #endif
1250       break;
1251     case 3:
1252   #if defined(PETSC_USE_COMPLEX)
1253       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
1254 
1255       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
1256       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1257   #else
1258       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1259 
1260       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
1261       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
1262   #endif
1263       break;
1264     default:
1265   #if defined(PETSC_USE_COMPLEX)
1266       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
1267 
1268       fft->n = (PetscInt)local_n0 * partial_dim;
1269       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1270   #else
1271       temp = pdim[ndim - 1];
1272 
1273       pdim[ndim - 1] = temp / 2 + 1;
1274 
1275       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
1276 
1277       fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp;
1278       N1     = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
1279 
1280       pdim[ndim - 1] = temp;
1281 
1282       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1283   #endif
1284       break;
1285     }
1286 #endif
1287   }
1288   free(pdim);
1289   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
1290   PetscCall(PetscNew(&fftw));
1291   fft->data = (void *)fftw;
1292 
1293   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1294   fftw->partial_dim = partial_dim;
1295 
1296   PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw)));
1297   if (size == 1) {
1298 #if defined(PETSC_USE_64BIT_INDICES)
1299     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1300 #else
1301     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1302 #endif
1303   }
1304 
1305   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1306 
1307   fftw->p_forward  = NULL;
1308   fftw->p_backward = NULL;
1309   fftw->p_flag     = FFTW_ESTIMATE;
1310 
1311   if (size == 1) {
1312     A->ops->mult          = MatMult_SeqFFTW;
1313     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1314 #if !PetscDefined(HAVE_MPIUNI)
1315   } else {
1316     A->ops->mult          = MatMult_MPIFFTW;
1317     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1318 #endif
1319   }
1320   fft->matdestroy = MatDestroy_FFTW;
1321   A->assembled    = PETSC_TRUE;
1322   A->preallocated = PETSC_TRUE;
1323 
1324   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
1325   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
1326   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1327 
1328   /* get runtime options */
1329   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
1330   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
1331   if (flg) fftw->p_flag = iplans[p_flag];
1332   PetscOptionsEnd();
1333   PetscFunctionReturn(PETSC_SUCCESS);
1334 }
1335