xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision ba61784b0d5279c10b9d3cdd07d6ab8635a1fe44)
1b2b77a04SHong Zhang /*
2b2b77a04SHong Zhang     Provides an interface to the FFTW package.
3c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
4b2b77a04SHong Zhang */
5b2b77a04SHong Zhang 
6c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
7b2b77a04SHong Zhang EXTERN_C_BEGIN
80cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
9c6db04a5SJed Brown   #include <fftw3-mpi.h>
100cf2e031SBarry Smith #else
110cf2e031SBarry Smith   #include <fftw3.h>
120cf2e031SBarry Smith #endif
13b2b77a04SHong Zhang EXTERN_C_END
14b2b77a04SHong Zhang 
15b2b77a04SHong Zhang typedef struct {
16*2884b08dSBarry Smith   PetscInt   ndim_fftw;
17*2884b08dSBarry Smith   ptrdiff_t *dim_fftw;
18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
19a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
20a1b6d50cSHong Zhang #else
218c1d5ab3SHong Zhang   fftw_iodim *iodims;
22a1b6d50cSHong Zhang #endif
23e5d7f247SAmlan Barua   PetscInt     partial_dim;
24b2b77a04SHong Zhang   fftw_plan    p_forward, p_backward;
25b2b77a04SHong Zhang   unsigned     p_flag;                                      /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26e8bdd179SBarry Smith   PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
27b2b77a04SHong Zhang } Mat_FFTW;
28b2b77a04SHong Zhang 
290cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
30b2b77a04SHong Zhang 
MatMult_SeqFFTW(Mat A,Vec x,Vec y)31523895eeSPierre Jolivet static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
32d71ae5a4SJacob Faibussowitsch {
33b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
34b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
35f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
36f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
37e8bdd179SBarry Smith   Vec                xx;
381acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
39a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
40a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
41a1b6d50cSHong Zhang   #else
428c1d5ab3SHong Zhang   fftw_iodim *iodims;
43a1b6d50cSHong Zhang   #endif
441acd80e4SHong Zhang   PetscInt i;
451acd80e4SHong Zhang #endif
461acd80e4SHong Zhang   PetscInt ndim = fft->ndim, *dim = fft->dim;
47b2b77a04SHong Zhang 
48b2b77a04SHong Zhang   PetscFunctionBegin;
49e8bdd179SBarry Smith   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
50e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
51e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
52e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
53e8bdd179SBarry Smith   } else {
549566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
55e8bdd179SBarry Smith   }
569566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
576aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
58b2b77a04SHong Zhang     switch (ndim) {
59b2b77a04SHong Zhang     case 1:
6058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
61b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
6258a912c5SAmlan Barua #else
6358a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
6458a912c5SAmlan Barua #endif
65b2b77a04SHong Zhang       break;
66b2b77a04SHong Zhang     case 2:
6758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
68b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
6958a912c5SAmlan Barua #else
7058a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7158a912c5SAmlan Barua #endif
72b2b77a04SHong Zhang       break;
73b2b77a04SHong Zhang     case 3:
7458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
75b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7658a912c5SAmlan Barua #else
7758a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7858a912c5SAmlan Barua #endif
79b2b77a04SHong Zhang       break;
80b2b77a04SHong Zhang     default:
8158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
82a1b6d50cSHong Zhang       iodims = fftw->iodims;
83a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
848c1d5ab3SHong Zhang       if (ndim) {
85a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (ptrdiff_t)dim[ndim - 1];
868c1d5ab3SHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
878c1d5ab3SHong Zhang         for (i = ndim - 2; i >= 0; --i) {
88a1b6d50cSHong Zhang           iodims[i].n  = (ptrdiff_t)dim[i];
898c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
908c1d5ab3SHong Zhang         }
918c1d5ab3SHong Zhang       }
92a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
93a1b6d50cSHong Zhang   #else
94a1b6d50cSHong Zhang       if (ndim) {
95a1b6d50cSHong Zhang         iodims[ndim - 1].n  = (int)dim[ndim - 1];
96a1b6d50cSHong Zhang         iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
97a1b6d50cSHong Zhang         for (i = ndim - 2; i >= 0; --i) {
98a1b6d50cSHong Zhang           iodims[i].n  = (int)dim[i];
99a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
100a1b6d50cSHong Zhang         }
101a1b6d50cSHong Zhang       }
102a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
103a1b6d50cSHong Zhang   #endif
104a1b6d50cSHong Zhang 
10558a912c5SAmlan Barua #else
106a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
10758a912c5SAmlan Barua #endif
108b2b77a04SHong Zhang       break;
109b2b77a04SHong Zhang     }
110e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
111e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
112e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
113e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
114e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
115e8bdd179SBarry Smith     } else {
116f4fca6d4SMatthew G. Knepley       fftw->finarray  = (PetscScalar *)x_array;
117b2b77a04SHong Zhang       fftw->foutarray = y_array;
118e8bdd179SBarry Smith     }
119e8bdd179SBarry Smith   }
120e8bdd179SBarry Smith 
121b2b77a04SHong Zhang   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1227e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
123b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
1247e4bc134SDominic Meiser #else
1257e4bc134SDominic Meiser     fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
1267e4bc134SDominic Meiser #endif
127b2b77a04SHong Zhang   } else {
128b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
129b2b77a04SHong Zhang   }
1309566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
1319566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
1323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
133b2b77a04SHong Zhang }
134b2b77a04SHong Zhang 
MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)135523895eeSPierre Jolivet static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
136d71ae5a4SJacob Faibussowitsch {
137b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
138b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
139f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
140f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
141b2b77a04SHong Zhang   PetscInt           ndim = fft->ndim, *dim = fft->dim;
142e8bdd179SBarry Smith   Vec                xx;
1431acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
144a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
145a1b6d50cSHong Zhang   fftw_iodim64 *iodims = fftw->iodims;
146a1b6d50cSHong Zhang   #else
147a1b6d50cSHong Zhang   fftw_iodim *iodims = fftw->iodims;
148a1b6d50cSHong Zhang   #endif
1491acd80e4SHong Zhang #endif
150b2b77a04SHong Zhang 
151b2b77a04SHong Zhang   PetscFunctionBegin;
152e8bdd179SBarry Smith   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
153e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
154e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
155e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
156e8bdd179SBarry Smith   } else {
1579566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
158e8bdd179SBarry Smith   }
1599566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
1606aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
161b2b77a04SHong Zhang     switch (ndim) {
162b2b77a04SHong Zhang     case 1:
16358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
164b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
16558a912c5SAmlan Barua #else
166e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
16758a912c5SAmlan Barua #endif
168b2b77a04SHong Zhang       break;
169b2b77a04SHong Zhang     case 2:
17058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
171b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17258a912c5SAmlan Barua #else
173e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
17458a912c5SAmlan Barua #endif
175b2b77a04SHong Zhang       break;
176b2b77a04SHong Zhang     case 3:
17758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
178b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17958a912c5SAmlan Barua #else
180e81bb599SAmlan Barua       fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
18158a912c5SAmlan Barua #endif
182b2b77a04SHong Zhang       break;
183b2b77a04SHong Zhang     default:
18458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
185a1b6d50cSHong Zhang   #if defined(PETSC_USE_64BIT_INDICES)
186a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
187a1b6d50cSHong Zhang   #else
1888c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
189a1b6d50cSHong Zhang   #endif
19058a912c5SAmlan Barua #else
191a31b9140SHong Zhang       fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
19258a912c5SAmlan Barua #endif
193b2b77a04SHong Zhang       break;
194b2b77a04SHong Zhang     }
195e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
196e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
197e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
198e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
199e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
200e8bdd179SBarry Smith     } else {
201f4fca6d4SMatthew G. Knepley       fftw->binarray  = (PetscScalar *)x_array;
202b2b77a04SHong Zhang       fftw->boutarray = y_array;
203e8bdd179SBarry Smith     }
204e8bdd179SBarry Smith   }
205b2b77a04SHong Zhang   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2067e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
207b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
2087e4bc134SDominic Meiser #else
2097e4bc134SDominic Meiser     fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
2107e4bc134SDominic Meiser #endif
211b2b77a04SHong Zhang   } else {
2122f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
213b2b77a04SHong Zhang   }
2149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
217b2b77a04SHong Zhang }
218b2b77a04SHong Zhang 
2190cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
MatMult_MPIFFTW(Mat A,Vec x,Vec y)220523895eeSPierre Jolivet static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
221d71ae5a4SJacob Faibussowitsch {
222b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
223b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
224f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
225f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
226c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
227ce94432eSBarry Smith   MPI_Comm           comm;
228e8bdd179SBarry Smith   Vec                xx;
229b2b77a04SHong Zhang 
230b2b77a04SHong Zhang   PetscFunctionBegin;
2319566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
232e8bdd179SBarry Smith   if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
233e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
234e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
235e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
236e8bdd179SBarry Smith   } else {
2379566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
238e8bdd179SBarry Smith   }
2399566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
2406aad120cSJose E. Roman   if (!fftw->p_forward) { /* create a plan, then execute it */
241b2b77a04SHong Zhang     switch (ndim) {
242b2b77a04SHong Zhang     case 1:
2433c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
244b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
245ae0a50aaSHong Zhang   #else
2464f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
2473c3a9c75SAmlan Barua   #endif
248b2b77a04SHong Zhang       break;
249b2b77a04SHong Zhang     case 2:
25097504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
251b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2523c3a9c75SAmlan Barua   #else
2533c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2543c3a9c75SAmlan Barua   #endif
255b2b77a04SHong Zhang       break;
256b2b77a04SHong Zhang     case 3:
2573c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
258b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2593c3a9c75SAmlan Barua   #else
26051d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2613c3a9c75SAmlan Barua   #endif
262b2b77a04SHong Zhang       break;
263b2b77a04SHong Zhang     default:
2643c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
265c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2663c3a9c75SAmlan Barua   #else
2673c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2683c3a9c75SAmlan Barua   #endif
269b2b77a04SHong Zhang       break;
270b2b77a04SHong Zhang     }
271e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
272e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
273e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
274e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
275e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
276e8bdd179SBarry Smith     } else {
277f4fca6d4SMatthew G. Knepley       fftw->finarray  = (PetscScalar *)x_array;
278b2b77a04SHong Zhang       fftw->foutarray = y_array;
279e8bdd179SBarry Smith     }
280e8bdd179SBarry Smith   }
281b2b77a04SHong Zhang   if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
282b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
283b2b77a04SHong Zhang   } else {
284b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
285b2b77a04SHong Zhang   }
2869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
2879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
2883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
289b2b77a04SHong Zhang }
290b2b77a04SHong Zhang 
MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)291523895eeSPierre Jolivet static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
292d71ae5a4SJacob Faibussowitsch {
293b2b77a04SHong Zhang   Mat_FFT           *fft  = (Mat_FFT *)A->data;
294b2b77a04SHong Zhang   Mat_FFTW          *fftw = (Mat_FFTW *)fft->data;
295f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
296f4fca6d4SMatthew G. Knepley   PetscScalar       *y_array;
297c92418ccSAmlan Barua   PetscInt           ndim = fft->ndim, *dim = fft->dim;
298ce94432eSBarry Smith   MPI_Comm           comm;
299e8bdd179SBarry Smith   Vec                xx;
300b2b77a04SHong Zhang 
301b2b77a04SHong Zhang   PetscFunctionBegin;
3029566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
303e8bdd179SBarry Smith   if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
304e8bdd179SBarry Smith     /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
305e8bdd179SBarry Smith     PetscCall(VecDuplicate(x, &xx));
306e8bdd179SBarry Smith     PetscCall(VecGetArrayRead(xx, &x_array));
307e8bdd179SBarry Smith   } else {
3089566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &x_array));
309e8bdd179SBarry Smith   }
3109566063dSJacob Faibussowitsch   PetscCall(VecGetArray(y, &y_array));
3116aad120cSJose E. Roman   if (!fftw->p_backward) { /* create a plan, then execute it */
312b2b77a04SHong Zhang     switch (ndim) {
313b2b77a04SHong Zhang     case 1:
3143c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
315b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
316ae0a50aaSHong Zhang   #else
3174f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
3183c3a9c75SAmlan Barua   #endif
319b2b77a04SHong Zhang       break;
320b2b77a04SHong Zhang     case 2:
32197504071SAmlan Barua   #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
322b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3233c3a9c75SAmlan Barua   #else
3243c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3253c3a9c75SAmlan Barua   #endif
326b2b77a04SHong Zhang       break;
327b2b77a04SHong Zhang     case 3:
3283c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
329b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3303c3a9c75SAmlan Barua   #else
3313c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3323c3a9c75SAmlan Barua   #endif
333b2b77a04SHong Zhang       break;
334b2b77a04SHong Zhang     default:
3353c3a9c75SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
336c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3373c3a9c75SAmlan Barua   #else
3383c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3393c3a9c75SAmlan Barua   #endif
340b2b77a04SHong Zhang       break;
341b2b77a04SHong Zhang     }
342e8bdd179SBarry Smith     if (fftw->p_flag != FFTW_ESTIMATE) {
343e8bdd179SBarry Smith       /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
344e8bdd179SBarry Smith       PetscCall(VecRestoreArrayRead(xx, &x_array));
345e8bdd179SBarry Smith       PetscCall(VecDestroy(&xx));
346e8bdd179SBarry Smith       PetscCall(VecGetArrayRead(x, &x_array));
347e8bdd179SBarry Smith     } else {
348f4fca6d4SMatthew G. Knepley       fftw->binarray  = (PetscScalar *)x_array;
349b2b77a04SHong Zhang       fftw->boutarray = y_array;
350e8bdd179SBarry Smith     }
351e8bdd179SBarry Smith   }
352b2b77a04SHong Zhang   if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
353b2b77a04SHong Zhang     fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
354b2b77a04SHong Zhang   } else {
3552f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
356b2b77a04SHong Zhang   }
3579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(y, &y_array));
3589566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(x, &x_array));
3593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
360b2b77a04SHong Zhang }
3610cf2e031SBarry Smith #endif
362b2b77a04SHong Zhang 
MatDestroy_FFTW(Mat A)363523895eeSPierre Jolivet static PetscErrorCode MatDestroy_FFTW(Mat A)
364d71ae5a4SJacob Faibussowitsch {
365b2b77a04SHong Zhang   Mat_FFT  *fft  = (Mat_FFT *)A->data;
366b2b77a04SHong Zhang   Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
367b2b77a04SHong Zhang 
368b2b77a04SHong Zhang   PetscFunctionBegin;
369b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
370b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
371ad540459SPierre Jolivet   if (fftw->iodims) free(fftw->iodims);
3729566063dSJacob Faibussowitsch   PetscCall(PetscFree(fftw->dim_fftw));
3739566063dSJacob Faibussowitsch   PetscCall(PetscFree(fft->data));
3742e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
3752e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
3762e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
3770cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3786ccf0b3eSHong Zhang   fftw_mpi_cleanup();
3790cf2e031SBarry Smith #endif
3803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
381b2b77a04SHong Zhang }
382b2b77a04SHong Zhang 
3830cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
384c6db04a5SJed Brown   #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I  "petscvec.h"   I*/
VecDestroy_MPIFFTW(Vec v)385523895eeSPierre Jolivet static PetscErrorCode VecDestroy_MPIFFTW(Vec v)
386d71ae5a4SJacob Faibussowitsch {
387b2b77a04SHong Zhang   PetscScalar *array;
388b2b77a04SHong Zhang 
389b2b77a04SHong Zhang   PetscFunctionBegin;
3909566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &array));
3912f613bf5SBarry Smith   fftw_free((fftw_complex *)array);
3929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &array));
3939566063dSJacob Faibussowitsch   PetscCall(VecDestroy_MPI(v));
3943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
395b2b77a04SHong Zhang }
3960cf2e031SBarry Smith #endif
397b2b77a04SHong Zhang 
3980cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
VecDuplicate_FFTW_fin(Vec fin,Vec * fin_new)399d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
400d71ae5a4SJacob Faibussowitsch {
4015b113e21Ss-sajid-ali   Mat A;
4025b113e21Ss-sajid-ali 
4035b113e21Ss-sajid-ali   PetscFunctionBegin;
4049566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
4059566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
4063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4075b113e21Ss-sajid-ali }
4085b113e21Ss-sajid-ali 
VecDuplicate_FFTW_fout(Vec fout,Vec * fout_new)409d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
410d71ae5a4SJacob Faibussowitsch {
4115b113e21Ss-sajid-ali   Mat A;
4125b113e21Ss-sajid-ali 
4135b113e21Ss-sajid-ali   PetscFunctionBegin;
4149566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
4159566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
4163ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4175b113e21Ss-sajid-ali }
4185b113e21Ss-sajid-ali 
VecDuplicate_FFTW_bout(Vec bout,Vec * bout_new)419d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
420d71ae5a4SJacob Faibussowitsch {
4215b113e21Ss-sajid-ali   Mat A;
4225b113e21Ss-sajid-ali 
4235b113e21Ss-sajid-ali   PetscFunctionBegin;
4249566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
4259566063dSJacob Faibussowitsch   PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
4263ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4275b113e21Ss-sajid-ali }
4280cf2e031SBarry Smith #endif
4295b113e21Ss-sajid-ali 
4304be45526SHong Zhang /*@
4312a7a6963SBarry Smith   MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
43211a5261eSBarry Smith   parallel layout determined by `MATFFTW`
4334f7415efSAmlan Barua 
434c3339decSBarry Smith   Collective
4354f7415efSAmlan Barua 
4364f7415efSAmlan Barua   Input Parameter:
4373564f093SHong Zhang . A - the matrix
4384f7415efSAmlan Barua 
439d8d19677SJose E. Roman   Output Parameters:
440cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW
4416b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW
442cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW
4434f7415efSAmlan Barua 
4442ef1f0ffSBarry Smith   Options Database Key:
4452ef1f0ffSBarry Smith . -mat_fftw_plannerflags - set FFTW planner flags
4462ef1f0ffSBarry Smith 
4474f7415efSAmlan Barua   Level: advanced
4483564f093SHong Zhang 
44911a5261eSBarry Smith   Notes:
45011a5261eSBarry Smith   The parallel layout of output of forward FFTW is always same as the input
45197504071SAmlan Barua   of the backward FFTW. But parallel layout of the input vector of forward
45297504071SAmlan Barua   FFTW might not be same as the output of backward FFTW.
45311a5261eSBarry Smith 
45411a5261eSBarry Smith   We need to provide enough space while doing parallel real transform.
45597504071SAmlan Barua   We need to pad extra zeros at the end of last dimension. For this reason the one needs to
45697504071SAmlan Barua   invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
457e0ec536eSAmlan Barua   last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
458e0ec536eSAmlan Barua   depends on if the last dimension is even or odd. If the last dimension is even need to pad two
459e0ec536eSAmlan Barua   zeros if it is odd only one zero is needed.
46011a5261eSBarry Smith 
461e0ec536eSAmlan Barua   Lastly one needs some scratch space at the end of data set in each process. alloc_local
462e0ec536eSAmlan Barua   figures out how much space is needed, i.e. it figures out the data+scratch space for
463e0ec536eSAmlan Barua   each processor and returns that.
4644f7415efSAmlan Barua 
465fe59aa6dSJacob Faibussowitsch   Developer Notes:
46611a5261eSBarry Smith   How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
46711a5261eSBarry Smith 
4681cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
4694be45526SHong Zhang @*/
MatCreateVecsFFTW(Mat A,Vec * x,Vec * y,Vec * z)470d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
471d71ae5a4SJacob Faibussowitsch {
4724be45526SHong Zhang   PetscFunctionBegin;
473cac4c232SBarry Smith   PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
4743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4754be45526SHong Zhang }
4764be45526SHong Zhang 
MatCreateVecsFFTW_FFTW(Mat A,Vec * fin,Vec * fout,Vec * bout)477d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
478d71ae5a4SJacob Faibussowitsch {
4794f7415efSAmlan Barua   PetscMPIInt size, rank;
480ce94432eSBarry Smith   MPI_Comm    comm;
4814f7415efSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
4824f7415efSAmlan Barua 
4834f7415efSAmlan Barua   PetscFunctionBegin;
4844f7415efSAmlan Barua   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4854f7415efSAmlan Barua   PetscValidType(A, 1);
4869566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
4874f7415efSAmlan Barua 
4889566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
4899566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4904f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4914f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4929566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
4939566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
4949566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
4954f7415efSAmlan Barua #else
4969566063dSJacob Faibussowitsch     if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
4979566063dSJacob Faibussowitsch     if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
4989566063dSJacob Faibussowitsch     if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
4994f7415efSAmlan Barua #endif
5000cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
50197504071SAmlan Barua   } else { /* parallel cases */
5020cf2e031SBarry Smith     Mat_FFTW     *fftw = (Mat_FFTW *)fft->data;
5030cf2e031SBarry Smith     PetscInt      ndim = fft->ndim, *dim = fft->dim;
5044f7415efSAmlan Barua     ptrdiff_t     alloc_local, local_n0, local_0_start;
5059496c9aeSAmlan Barua     ptrdiff_t     local_n1;
5069496c9aeSAmlan Barua     fftw_complex *data_fout;
5079496c9aeSAmlan Barua     ptrdiff_t     local_1_start;
508*2884b08dSBarry Smith     PetscInt      n1, N1;
5099496c9aeSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
5109496c9aeSAmlan Barua     fftw_complex *data_fin, *data_bout;
5119496c9aeSAmlan Barua   #else
5124f7415efSAmlan Barua     double   *data_finr, *data_boutr;
5139496c9aeSAmlan Barua     ptrdiff_t temp;
5149496c9aeSAmlan Barua   #endif
5159496c9aeSAmlan Barua 
5164f7415efSAmlan Barua     switch (ndim) {
5174f7415efSAmlan Barua     case 1:
51857625b84SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
5194f8276c3SHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
52057625b84SAmlan Barua   #else
52157625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
52257625b84SAmlan Barua       if (fin) {
52357625b84SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
524*2884b08dSBarry Smith         PetscCall(PetscIntCast(local_n0, &n1));
525*2884b08dSBarry Smith         PetscCall(PetscIntCast(fft->N, &N1));
526*2884b08dSBarry Smith         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fin, fin));
5279566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5285b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
52957625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
53057625b84SAmlan Barua       }
53157625b84SAmlan Barua       if (fout) {
53257625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
533*2884b08dSBarry Smith         PetscCall(PetscIntCast(local_n1, &n1));
534*2884b08dSBarry Smith         PetscCall(PetscIntCast(fft->N, &N1));
535*2884b08dSBarry Smith         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fout, fout));
5369566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5375b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
53857625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
53957625b84SAmlan Barua       }
54057625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
54157625b84SAmlan Barua       if (bout) {
54257625b84SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
543*2884b08dSBarry Smith         PetscCall(PetscIntCast(local_n1, &n1));
544*2884b08dSBarry Smith         PetscCall(PetscIntCast(fft->N, &N1));
545*2884b08dSBarry Smith         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_bout, bout));
5469566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5475b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
54857625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
54957625b84SAmlan Barua       }
55057625b84SAmlan Barua       break;
55157625b84SAmlan Barua   #endif
5524f7415efSAmlan Barua     case 2:
55397504071SAmlan Barua   #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5544f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
555*2884b08dSBarry Smith       PetscCall(PetscIntCast(2 * dim[0] * (dim[1] / 2 + 1), &N1));
556*2884b08dSBarry Smith       PetscCall(PetscIntCast(2 * local_n0 * (dim[1] / 2 + 1), &n1));
5574f7415efSAmlan Barua       if (fin) {
5584f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5599566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5609566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5615b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5624f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5634f7415efSAmlan Barua       }
56457625b84SAmlan Barua       if (fout) {
56557625b84SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5669566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
5679566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5685b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
56957625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
57057625b84SAmlan Barua       }
5715b113e21Ss-sajid-ali       if (bout) {
5725b113e21Ss-sajid-ali         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5739566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
5749566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5755b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5765b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5775b113e21Ss-sajid-ali       }
5784f7415efSAmlan Barua   #else
5794f7415efSAmlan Barua       /* Get local size */
5804f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
5814f7415efSAmlan Barua       if (fin) {
5824f7415efSAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5839566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
5849566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5855b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5864f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5874f7415efSAmlan Barua       }
5884f7415efSAmlan Barua       if (fout) {
5894f7415efSAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5909566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
5919566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5925b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5934f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5944f7415efSAmlan Barua       }
5954f7415efSAmlan Barua       if (bout) {
5964f7415efSAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5979566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
5989566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5995b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6004f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6014f7415efSAmlan Barua       }
6024f7415efSAmlan Barua   #endif
6034f7415efSAmlan Barua       break;
6044f7415efSAmlan Barua     case 3:
6054f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
6064f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
607*2884b08dSBarry Smith       PetscCall(PetscIntCast(2 * dim[0] * dim[1] * (dim[2] / 2 + 1), &N1));
608*2884b08dSBarry Smith       PetscCall(PetscIntCast(2 * local_n0 * dim[1] * (dim[2] / 2 + 1), &n1));
6094f7415efSAmlan Barua       if (fin) {
6104f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6119566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
6129566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6135b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6144f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6154f7415efSAmlan Barua       }
6165b113e21Ss-sajid-ali       if (fout) {
6175b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6189566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
6199566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6205b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6215b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6225b113e21Ss-sajid-ali       }
6234f7415efSAmlan Barua       if (bout) {
6244f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6259566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
6269566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6275b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6284f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6294f7415efSAmlan Barua       }
6304f7415efSAmlan Barua   #else
6310c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
6320c9b18e4SAmlan Barua       if (fin) {
6330c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6349566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6359566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6365b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6370c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6380c9b18e4SAmlan Barua       }
6390c9b18e4SAmlan Barua       if (fout) {
6400c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6419566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6429566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6435b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6440c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6450c9b18e4SAmlan Barua       }
6460c9b18e4SAmlan Barua       if (bout) {
6470c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6489566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6499566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6505b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6510c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6520c9b18e4SAmlan Barua       }
6534f7415efSAmlan Barua   #endif
6544f7415efSAmlan Barua       break;
6554f7415efSAmlan Barua     default:
6564f7415efSAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
6574f7415efSAmlan Barua       temp                                  = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
6584f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
6594f7415efSAmlan Barua       alloc_local                           = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
660f4f49eeaSPierre Jolivet       N1                                    = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
6614f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
6624f7415efSAmlan Barua 
6634f7415efSAmlan Barua       if (fin) {
6644f7415efSAmlan Barua         data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6659566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
6669566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6675b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6684f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6694f7415efSAmlan Barua       }
6705b113e21Ss-sajid-ali       if (fout) {
6715b113e21Ss-sajid-ali         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6729566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
6739566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6745b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6755b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6765b113e21Ss-sajid-ali       }
6774f7415efSAmlan Barua       if (bout) {
6784f7415efSAmlan Barua         data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6799566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
6809566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6815b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6829496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6834f7415efSAmlan Barua       }
6844f7415efSAmlan Barua   #else
6850c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
6860c9b18e4SAmlan Barua       if (fin) {
6870c9b18e4SAmlan Barua         data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6889566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6899566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6905b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6910c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6920c9b18e4SAmlan Barua       }
6930c9b18e4SAmlan Barua       if (fout) {
6940c9b18e4SAmlan Barua         data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6959566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6969566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6975b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6980c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6990c9b18e4SAmlan Barua       }
7000c9b18e4SAmlan Barua       if (bout) {
7010c9b18e4SAmlan Barua         data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
7029566063dSJacob Faibussowitsch         PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
7039566063dSJacob Faibussowitsch         PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
7045b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
7050c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
7060c9b18e4SAmlan Barua       }
7074f7415efSAmlan Barua   #endif
7084f7415efSAmlan Barua       break;
7094f7415efSAmlan Barua     }
710f9d91177SJunchao Zhang     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
711f9d91177SJunchao Zhang        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
712da81f932SPierre Jolivet        memory leaks. We void these pointers here to avoid accident uses.
713f9d91177SJunchao Zhang     */
714f9d91177SJunchao Zhang     if (fin) (*fin)->ops->replacearray = NULL;
715f9d91177SJunchao Zhang     if (fout) (*fout)->ops->replacearray = NULL;
716f9d91177SJunchao Zhang     if (bout) (*bout)->ops->replacearray = NULL;
7170cf2e031SBarry Smith #endif
7184f7415efSAmlan Barua   }
7193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7204f7415efSAmlan Barua }
7214f7415efSAmlan Barua 
7223564f093SHong Zhang /*@
72311a5261eSBarry Smith   VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
72454efbe56SHong Zhang 
725c3339decSBarry Smith   Collective
7263564f093SHong Zhang 
7273564f093SHong Zhang   Input Parameters:
7283564f093SHong Zhang + A - FFTW matrix
7293564f093SHong Zhang - x - the PETSc vector
7303564f093SHong Zhang 
7312fe279fdSBarry Smith   Output Parameter:
7323564f093SHong Zhang . y - the FFTW vector
7333564f093SHong Zhang 
734b2b77a04SHong Zhang   Level: intermediate
7353564f093SHong Zhang 
73611a5261eSBarry Smith   Note:
73711a5261eSBarry Smith   For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
73897504071SAmlan Barua   one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
73997504071SAmlan Barua   zeros. This routine does that job by scattering operation.
74097504071SAmlan Barua 
7411cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
7423564f093SHong Zhang @*/
VecScatterPetscToFFTW(Mat A,Vec x,Vec y)743d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
744d71ae5a4SJacob Faibussowitsch {
7453564f093SHong Zhang   PetscFunctionBegin;
746cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
7473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7483564f093SHong Zhang }
7493c3a9c75SAmlan Barua 
VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)75066976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
751d71ae5a4SJacob Faibussowitsch {
752ce94432eSBarry Smith   MPI_Comm    comm;
7533c3a9c75SAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
7549496c9aeSAmlan Barua   PetscInt    low;
7557a21806cSHong Zhang   PetscMPIInt rank, size;
7567a21806cSHong Zhang   PetscInt    vsize, vsize1;
7573c3a9c75SAmlan Barua   VecScatter  vecscat;
7580cf2e031SBarry Smith   IS          list1;
7593c3a9c75SAmlan Barua 
7603564f093SHong Zhang   PetscFunctionBegin;
7619566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
7629566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
7639566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
7649566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(y, &low, NULL));
7653c3a9c75SAmlan Barua 
7663564f093SHong Zhang   if (size == 1) {
7679566063dSJacob Faibussowitsch     PetscCall(VecGetSize(x, &vsize));
7689566063dSJacob Faibussowitsch     PetscCall(VecGetSize(y, &vsize1));
7699566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
7709566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
7719566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7729566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7739566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
7749566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
7750cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
7763564f093SHong Zhang   } else {
7770cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
778*2884b08dSBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim, n1;
7790cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
7800cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
7810cf2e031SBarry Smith     IS        list2;
7820cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
7830cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
7840cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
7850cf2e031SBarry Smith     PetscInt  NM;
7860cf2e031SBarry Smith     ptrdiff_t temp;
787*2884b08dSBarry Smith   #else
788*2884b08dSBarry Smith     PetscInt nstart, nlow;
7890cf2e031SBarry Smith   #endif
7900cf2e031SBarry Smith 
7913c3a9c75SAmlan Barua     switch (ndim) {
7923c3a9c75SAmlan Barua     case 1:
79364657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
7947a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
79526fbe8dcSKarl Rupp 
796*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0, &n1));
797*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_0_start, &nstart));
798*2884b08dSBarry Smith       PetscCall(PetscIntCast(low, &nlow));
799*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
800*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
8019566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8029566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8039566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8049566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8059566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8069566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
80764657d84SAmlan Barua   #else
808e5d7f247SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
80964657d84SAmlan Barua   #endif
8103c3a9c75SAmlan Barua       break;
8113c3a9c75SAmlan Barua     case 2:
812bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8137a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
81426fbe8dcSKarl Rupp 
815*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
816*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
817*2884b08dSBarry Smith       PetscCall(PetscIntCast(low, &nlow));
818*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
819*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
8209566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8219566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8229566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8239566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8249566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8259566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
826bd59e6c6SAmlan Barua   #else
827c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
82826fbe8dcSKarl Rupp 
82957508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
83057508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
8313c3a9c75SAmlan Barua 
8323564f093SHong Zhang       if (dim[1] % 2 == 0) {
8333c3a9c75SAmlan Barua         NM = dim[1] + 2;
8343564f093SHong Zhang       } else {
8353c3a9c75SAmlan Barua         NM = dim[1] + 1;
8363564f093SHong Zhang       }
8373c3a9c75SAmlan Barua       for (i = 0; i < local_n0; i++) {
8383c3a9c75SAmlan Barua         for (j = 0; j < dim[1]; j++) {
8393c3a9c75SAmlan Barua           tempindx  = i * dim[1] + j;
8403c3a9c75SAmlan Barua           tempindx1 = i * NM + j;
84126fbe8dcSKarl Rupp 
842*2884b08dSBarry Smith           PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
8433c3a9c75SAmlan Barua           indx2[tempindx] = low + tempindx1;
8443c3a9c75SAmlan Barua         }
8453c3a9c75SAmlan Barua       }
8463c3a9c75SAmlan Barua 
847*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
848*2884b08dSBarry Smith       PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
849*2884b08dSBarry Smith       PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
8503c3a9c75SAmlan Barua 
8519566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8529566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8539566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8549566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8559566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8569566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
8579566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
8589566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
859bd59e6c6SAmlan Barua   #endif
8609496c9aeSAmlan Barua       break;
8613c3a9c75SAmlan Barua 
8623c3a9c75SAmlan Barua     case 3:
863bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
8647a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
86526fbe8dcSKarl Rupp 
866*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
867*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
868*2884b08dSBarry Smith       PetscCall(PetscIntCast(low, &nlow));
869*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
870*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
8719566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8729566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8739566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8749566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
8759566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
8769566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
877bd59e6c6SAmlan Barua   #else
878c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
879f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
8807a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
88126fbe8dcSKarl Rupp 
88257508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
88357508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
88451d1eed7SAmlan Barua 
885db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
886db4deed7SKarl Rupp       else NM = dim[2] + 1;
88751d1eed7SAmlan Barua 
88851d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
88951d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
89051d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
89151d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
89251d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
89326fbe8dcSKarl Rupp 
894*2884b08dSBarry Smith             PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
895*2884b08dSBarry Smith             PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
89651d1eed7SAmlan Barua           }
89751d1eed7SAmlan Barua         }
89851d1eed7SAmlan Barua       }
89951d1eed7SAmlan Barua 
9009566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
9019566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
9029566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9039566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9049566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9059566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9069566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9079566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
9089566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
9099566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
910bd59e6c6SAmlan Barua   #endif
9119496c9aeSAmlan Barua       break;
9123c3a9c75SAmlan Barua 
9133c3a9c75SAmlan Barua     default:
914bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
9157a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
91626fbe8dcSKarl Rupp 
917*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
918*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
919*2884b08dSBarry Smith       PetscCall(PetscIntCast(low, &nlow));
920*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
921*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
9229566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9239566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9249566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9259566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9269566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9279566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
928bd59e6c6SAmlan Barua   #else
929c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
930f1ade23cSHong Zhang       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
931e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
93226fbe8dcSKarl Rupp 
933e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
93426fbe8dcSKarl Rupp 
9357a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
93626fbe8dcSKarl Rupp 
937e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
938e5d7f247SAmlan Barua 
939e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
940e5d7f247SAmlan Barua 
94157508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
94257508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
943e5d7f247SAmlan Barua 
944db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
945db4deed7SKarl Rupp       else NM = 1;
946e5d7f247SAmlan Barua 
9476971673cSAmlan Barua       j = low;
94857508eceSPierre Jolivet       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
9496971673cSAmlan Barua         indx1[i] = local_0_start * partial_dim + i;
9506971673cSAmlan Barua         indx2[i] = j;
95126fbe8dcSKarl Rupp         if (k % dim[ndim - 1] == 0) j += NM;
9526971673cSAmlan Barua         j++;
9536971673cSAmlan Barua       }
9549566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
9559566063dSJacob Faibussowitsch       PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
9569566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9579566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9589566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9599566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
9609566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
9619566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
9629566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
9639566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
964bd59e6c6SAmlan Barua   #endif
9659496c9aeSAmlan Barua       break;
9663c3a9c75SAmlan Barua     }
9670cf2e031SBarry Smith #endif
968e81bb599SAmlan Barua   }
9693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9703c3a9c75SAmlan Barua }
9713c3a9c75SAmlan Barua 
9723564f093SHong Zhang /*@
97311a5261eSBarry Smith   VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
9743564f093SHong Zhang 
975c3339decSBarry Smith   Collective
9763564f093SHong Zhang 
9773564f093SHong Zhang   Input Parameters:
97811a5261eSBarry Smith + A - `MATFFTW` matrix
9793564f093SHong Zhang - x - FFTW vector
9803564f093SHong Zhang 
9812fe279fdSBarry Smith   Output Parameter:
9823564f093SHong Zhang . y - PETSc vector
9833564f093SHong Zhang 
9843564f093SHong Zhang   Level: intermediate
9853564f093SHong Zhang 
98611a5261eSBarry Smith   Note:
98711a5261eSBarry Smith   While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
98811a5261eSBarry Smith   `VecScatterFFTWToPetsc()` removes those extra zeros.
9893564f093SHong Zhang 
9901cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
9913564f093SHong Zhang @*/
VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)992d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
993d71ae5a4SJacob Faibussowitsch {
9943c3a9c75SAmlan Barua   PetscFunctionBegin;
995cac4c232SBarry Smith   PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
9963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
9973c3a9c75SAmlan Barua }
99854efbe56SHong Zhang 
VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)99966976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
1000d71ae5a4SJacob Faibussowitsch {
1001ce94432eSBarry Smith   MPI_Comm    comm;
10025b3e41ffSAmlan Barua   Mat_FFT    *fft = (Mat_FFT *)A->data;
10039496c9aeSAmlan Barua   PetscInt    low;
10047a21806cSHong Zhang   PetscMPIInt rank, size;
10055b3e41ffSAmlan Barua   VecScatter  vecscat;
10060cf2e031SBarry Smith   IS          list1;
10079496c9aeSAmlan Barua 
10083564f093SHong Zhang   PetscFunctionBegin;
10099566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
10109566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
10119566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
10129566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(x, &low, NULL));
10135b3e41ffSAmlan Barua 
1014e81bb599SAmlan Barua   if (size == 1) {
10159566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
10169566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
10179566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10189566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10199566063dSJacob Faibussowitsch     PetscCall(VecScatterDestroy(&vecscat));
10209566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&list1));
1021e81bb599SAmlan Barua 
10220cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10233564f093SHong Zhang   } else {
10240cf2e031SBarry Smith     Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
10250cf2e031SBarry Smith     PetscInt  ndim = fft->ndim, *dim = fft->dim;
10260cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start;
10270cf2e031SBarry Smith     ptrdiff_t local_n1, local_1_start;
10280cf2e031SBarry Smith     IS        list2;
10290cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
10300cf2e031SBarry Smith     PetscInt  i, j, k, partial_dim;
10310cf2e031SBarry Smith     PetscInt *indx1, *indx2, tempindx, tempindx1;
10320cf2e031SBarry Smith     PetscInt  NM;
10330cf2e031SBarry Smith     ptrdiff_t temp;
1034*2884b08dSBarry Smith   #else
1035*2884b08dSBarry Smith     PetscInt nlow, nstart;
10360cf2e031SBarry Smith   #endif
1037*2884b08dSBarry Smith     PetscInt n1;
1038e81bb599SAmlan Barua     switch (ndim) {
1039e81bb599SAmlan Barua     case 1:
104064657d84SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10417a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
104226fbe8dcSKarl Rupp 
1043*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n1, &n1));
1044*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_1_start, &nstart));
1045*2884b08dSBarry Smith       PetscCall(PetscIntCast(low, &nlow));
1046*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1047*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
10489566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10499566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10509566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10519566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10529566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10539566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
105464657d84SAmlan Barua   #else
10556a506ed8SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
105664657d84SAmlan Barua   #endif
10575b3e41ffSAmlan Barua       break;
10585b3e41ffSAmlan Barua     case 2:
1059bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
10607a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
106126fbe8dcSKarl Rupp 
1062*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1063*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
1064*2884b08dSBarry Smith       PetscCall(PetscIntCast(low, &nlow));
1065*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1066*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
10679566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10689566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10699566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10709566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
10719566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
10729566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1073bd59e6c6SAmlan Barua   #else
10747a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
107526fbe8dcSKarl Rupp 
107657508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
107757508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
10785b3e41ffSAmlan Barua 
1079db4deed7SKarl Rupp       if (dim[1] % 2 == 0) NM = dim[1] + 2;
1080db4deed7SKarl Rupp       else NM = dim[1] + 1;
10815b3e41ffSAmlan Barua 
10825b3e41ffSAmlan Barua       for (i = 0; i < local_n0; i++) {
10835b3e41ffSAmlan Barua         for (j = 0; j < dim[1]; j++) {
10845b3e41ffSAmlan Barua           tempindx  = i * dim[1] + j;
10855b3e41ffSAmlan Barua           tempindx1 = i * NM + j;
108626fbe8dcSKarl Rupp 
1087*2884b08dSBarry Smith           PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
1088*2884b08dSBarry Smith           PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
10895b3e41ffSAmlan Barua         }
10905b3e41ffSAmlan Barua       }
10915b3e41ffSAmlan Barua 
1092*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1093*2884b08dSBarry Smith       PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1094*2884b08dSBarry Smith       PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
10955b3e41ffSAmlan Barua 
10969566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10979566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10989566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10999566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11009566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11019566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11029566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11039566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1104bd59e6c6SAmlan Barua   #endif
11059496c9aeSAmlan Barua       break;
11065b3e41ffSAmlan Barua     case 3:
1107bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
11087a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
110926fbe8dcSKarl Rupp 
1110*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1111*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
1112*2884b08dSBarry Smith       PetscCall(PetscIntCast(low, &nlow));
1113*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1114*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
11159566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
11169566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11179566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11189566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11199566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11209566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1121bd59e6c6SAmlan Barua   #else
11227a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
112326fbe8dcSKarl Rupp 
112457508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
112557508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
112651d1eed7SAmlan Barua 
1127db4deed7SKarl Rupp       if (dim[2] % 2 == 0) NM = dim[2] + 2;
1128db4deed7SKarl Rupp       else NM = dim[2] + 1;
112951d1eed7SAmlan Barua 
113051d1eed7SAmlan Barua       for (i = 0; i < local_n0; i++) {
113151d1eed7SAmlan Barua         for (j = 0; j < dim[1]; j++) {
113251d1eed7SAmlan Barua           for (k = 0; k < dim[2]; k++) {
113351d1eed7SAmlan Barua             tempindx  = i * dim[1] * dim[2] + j * dim[2] + k;
113451d1eed7SAmlan Barua             tempindx1 = i * dim[1] * NM + j * NM + k;
113526fbe8dcSKarl Rupp 
1136*2884b08dSBarry Smith             PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
1137*2884b08dSBarry Smith             PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
113851d1eed7SAmlan Barua           }
113951d1eed7SAmlan Barua         }
114051d1eed7SAmlan Barua       }
114151d1eed7SAmlan Barua 
1142*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1143*2884b08dSBarry Smith       PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1144*2884b08dSBarry Smith       PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
114551d1eed7SAmlan Barua 
11469566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11479566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11489566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11499566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11509566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11519566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
11529566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
11539566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1154bd59e6c6SAmlan Barua   #endif
11559496c9aeSAmlan Barua       break;
11565b3e41ffSAmlan Barua     default:
1157bd59e6c6SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
11587a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
115926fbe8dcSKarl Rupp 
1160*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
1161*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
1162*2884b08dSBarry Smith       PetscCall(PetscIntCast(low, &nlow));
1163*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1164*2884b08dSBarry Smith       PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
11659566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
11669566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11679566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11689566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
11699566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
11709566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
1171bd59e6c6SAmlan Barua   #else
1172ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
117326fbe8dcSKarl Rupp 
1174ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
117526fbe8dcSKarl Rupp 
1176c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
117726fbe8dcSKarl Rupp 
1178ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1179ba6e06d0SAmlan Barua 
1180ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1181ba6e06d0SAmlan Barua 
118257508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
118357508eceSPierre Jolivet       PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
1184ba6e06d0SAmlan Barua 
1185db4deed7SKarl Rupp       if (dim[ndim - 1] % 2 == 0) NM = 2;
1186db4deed7SKarl Rupp       else NM = 1;
1187ba6e06d0SAmlan Barua 
1188ba6e06d0SAmlan Barua       j = low;
118957508eceSPierre Jolivet       for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1190*2884b08dSBarry Smith         PetscCall(PetscIntCast(local_0_start * partial_dim + i, &indx1[i]));
1191ba6e06d0SAmlan Barua         indx2[i] = j;
11923564f093SHong Zhang         if (k % dim[ndim - 1] == 0) j += NM;
1193ba6e06d0SAmlan Barua         j++;
1194ba6e06d0SAmlan Barua       }
1195*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * partial_dim, &n1));
1196*2884b08dSBarry Smith       PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1197*2884b08dSBarry Smith       PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1198ba6e06d0SAmlan Barua 
11999566063dSJacob Faibussowitsch       PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
12009566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
12019566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
12029566063dSJacob Faibussowitsch       PetscCall(VecScatterDestroy(&vecscat));
12039566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list1));
12049566063dSJacob Faibussowitsch       PetscCall(ISDestroy(&list2));
12059566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx1));
12069566063dSJacob Faibussowitsch       PetscCall(PetscFree(indx2));
1207bd59e6c6SAmlan Barua   #endif
12089496c9aeSAmlan Barua       break;
12095b3e41ffSAmlan Barua     }
12100cf2e031SBarry Smith #endif
1211e81bb599SAmlan Barua   }
12123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
12135b3e41ffSAmlan Barua }
12145b3e41ffSAmlan Barua 
1215350f1385SJose E. Roman /*MC
1216350f1385SJose E. Roman   MATFFTW -  "fftw" - Matrix type that provides FFT via the FFTW external package.
1217350f1385SJose E. Roman 
1218350f1385SJose E. Roman   Level: intermediate
1219350f1385SJose E. Roman 
12201cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1221350f1385SJose E. Roman M*/
MatCreate_FFTW(Mat A)1222d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1223d71ae5a4SJacob Faibussowitsch {
1224ce94432eSBarry Smith   MPI_Comm    comm;
1225b2b77a04SHong Zhang   Mat_FFT    *fft = (Mat_FFT *)A->data;
1226b2b77a04SHong Zhang   Mat_FFTW   *fftw;
12270cf2e031SBarry Smith   PetscInt    ndim = fft->ndim, *dim = fft->dim;
12285274ed1bSHong Zhang   const char *plans[]  = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
12295274ed1bSHong Zhang   unsigned    iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1230b2b77a04SHong Zhang   PetscBool   flg;
12314f48bc67SAmlan Barua   PetscInt    p_flag, partial_dim = 1, ctr;
123211902ff2SHong Zhang   PetscMPIInt size, rank;
12339496c9aeSAmlan Barua   ptrdiff_t  *pdim;
12349496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12350cf2e031SBarry Smith   PetscInt tot_dim;
12369496c9aeSAmlan Barua #endif
12379496c9aeSAmlan Barua 
1238b2b77a04SHong Zhang   PetscFunctionBegin;
12399566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
12409566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
12419566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
1242c92418ccSAmlan Barua 
12430cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12441878d478SAmlan Barua   fftw_mpi_init();
12450cf2e031SBarry Smith #endif
124611902ff2SHong Zhang   pdim    = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
124711902ff2SHong Zhang   pdim[0] = dim[0];
12488ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12498ca4c034SAmlan Barua   tot_dim = 2 * dim[0];
12508ca4c034SAmlan Barua #endif
12513564f093SHong Zhang   for (ctr = 1; ctr < ndim; ctr++) {
12526882372aSHong Zhang     partial_dim *= dim[ctr];
125311902ff2SHong Zhang     pdim[ctr] = dim[ctr];
12548ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1255db4deed7SKarl Rupp     if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1256db4deed7SKarl Rupp     else tot_dim *= dim[ctr];
12578ca4c034SAmlan Barua #endif
12586882372aSHong Zhang   }
12593c3a9c75SAmlan Barua 
1260b2b77a04SHong Zhang   if (size == 1) {
1261e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12629566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
12630cf2e031SBarry Smith     fft->n = fft->N;
1264e81bb599SAmlan Barua #else
12659566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
12660cf2e031SBarry Smith     fft->n = tot_dim;
12670cf2e031SBarry Smith #endif
12680cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12690cf2e031SBarry Smith   } else {
12700cf2e031SBarry Smith     ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
12710cf2e031SBarry Smith   #if !defined(PETSC_USE_COMPLEX)
12720cf2e031SBarry Smith     ptrdiff_t temp;
12730cf2e031SBarry Smith     PetscInt  N1;
1274*2884b08dSBarry Smith   #else
1275*2884b08dSBarry Smith     PetscInt n1;
1276e81bb599SAmlan Barua   #endif
1277e81bb599SAmlan Barua 
1278b2b77a04SHong Zhang     switch (ndim) {
1279b2b77a04SHong Zhang     case 1:
12803c3a9c75SAmlan Barua   #if !defined(PETSC_USE_COMPLEX)
12813c3a9c75SAmlan Barua       SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1282e5d7f247SAmlan Barua   #else
12837a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1284*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0, &fft->n));
1285*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n1, &n1));
1286*2884b08dSBarry Smith       PetscCall(MatSetSizes(A, n1, fft->n, fft->N, fft->N));
1287e5d7f247SAmlan Barua   #endif
1288b2b77a04SHong Zhang       break;
1289b2b77a04SHong Zhang     case 2:
12905b3e41ffSAmlan Barua   #if defined(PETSC_USE_COMPLEX)
12917a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
12920cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1];
12939566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
12945b3e41ffSAmlan Barua   #else
1295c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
129626fbe8dcSKarl Rupp 
12970cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
12989566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
12995b3e41ffSAmlan Barua   #endif
1300b2b77a04SHong Zhang       break;
1301b2b77a04SHong Zhang     case 3:
130251d1eed7SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
13037a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
130426fbe8dcSKarl Rupp 
13050cf2e031SBarry Smith       fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
13069566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
130751d1eed7SAmlan Barua   #else
1308c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
130926fbe8dcSKarl Rupp 
13100cf2e031SBarry Smith       fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
13119566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
131251d1eed7SAmlan Barua   #endif
1313b2b77a04SHong Zhang       break;
1314b2b77a04SHong Zhang     default:
1315b3a17365SAmlan Barua   #if defined(PETSC_USE_COMPLEX)
13167a21806cSHong Zhang       fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
131726fbe8dcSKarl Rupp 
1318*2884b08dSBarry Smith       PetscCall(PetscIntCast(local_n0 * partial_dim, &fft->n));
13199566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1320b3a17365SAmlan Barua   #else
1321b3a17365SAmlan Barua       temp = pdim[ndim - 1];
132226fbe8dcSKarl Rupp 
1323b3a17365SAmlan Barua       pdim[ndim - 1] = temp / 2 + 1;
132426fbe8dcSKarl Rupp 
1325c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
132626fbe8dcSKarl Rupp 
1327*2884b08dSBarry Smith       PetscCall(PetscIntCast(2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp, &fft->n));
13280cf2e031SBarry Smith       N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
132926fbe8dcSKarl Rupp 
1330b3a17365SAmlan Barua       pdim[ndim - 1] = temp;
133126fbe8dcSKarl Rupp 
13329566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1333b3a17365SAmlan Barua   #endif
1334b2b77a04SHong Zhang       break;
1335b2b77a04SHong Zhang     }
13360cf2e031SBarry Smith #endif
1337b2b77a04SHong Zhang   }
13382277172eSMark Adams   free(pdim);
13399566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
13404dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&fftw));
1341b2b77a04SHong Zhang   fft->data = (void *)fftw;
1342b2b77a04SHong Zhang 
1343*2884b08dSBarry Smith   fftw->ndim_fftw   = ndim; /* This is dimension of fft */
1344e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
134526fbe8dcSKarl Rupp 
1346f4f49eeaSPierre Jolivet   PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
13478c1d5ab3SHong Zhang   if (size == 1) {
1348a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1349a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1350a1b6d50cSHong Zhang #else
13518c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1352a1b6d50cSHong Zhang #endif
13538c1d5ab3SHong Zhang   }
135426fbe8dcSKarl Rupp 
1355b1301b2eSHong Zhang   for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1356c92418ccSAmlan Barua 
1357f4259b30SLisandro Dalcin   fftw->p_forward  = NULL;
1358f4259b30SLisandro Dalcin   fftw->p_backward = NULL;
1359b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1360b2b77a04SHong Zhang 
1361b2b77a04SHong Zhang   if (size == 1) {
1362b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1363b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
13640cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1365b2b77a04SHong Zhang   } else {
1366b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1367b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
13680cf2e031SBarry Smith #endif
1369b2b77a04SHong Zhang   }
1370b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1371b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
13724a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
137326fbe8dcSKarl Rupp 
13749566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
13759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
13769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1377b2b77a04SHong Zhang 
1378b2b77a04SHong Zhang   /* get runtime options */
1379d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
13809566063dSJacob Faibussowitsch   PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
13815f80ce2aSJacob Faibussowitsch   if (flg) fftw->p_flag = iplans[p_flag];
1382d0609cedSBarry Smith   PetscOptionsEnd();
13833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1384b2b77a04SHong Zhang }
1385