1b2b77a04SHong Zhang /*
2b2b77a04SHong Zhang Provides an interface to the FFTW package.
3c4762a1bSJed Brown Testing examples can be found in ~src/mat/tests
4b2b77a04SHong Zhang */
5b2b77a04SHong Zhang
6c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/
7b2b77a04SHong Zhang EXTERN_C_BEGIN
80cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
9c6db04a5SJed Brown #include <fftw3-mpi.h>
100cf2e031SBarry Smith #else
110cf2e031SBarry Smith #include <fftw3.h>
120cf2e031SBarry Smith #endif
13b2b77a04SHong Zhang EXTERN_C_END
14b2b77a04SHong Zhang
15b2b77a04SHong Zhang typedef struct {
16*2884b08dSBarry Smith PetscInt ndim_fftw;
17*2884b08dSBarry Smith ptrdiff_t *dim_fftw;
18a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
19a1b6d50cSHong Zhang fftw_iodim64 *iodims;
20a1b6d50cSHong Zhang #else
218c1d5ab3SHong Zhang fftw_iodim *iodims;
22a1b6d50cSHong Zhang #endif
23e5d7f247SAmlan Barua PetscInt partial_dim;
24b2b77a04SHong Zhang fftw_plan p_forward, p_backward;
25b2b77a04SHong Zhang unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
26e8bdd179SBarry Smith PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */
27b2b77a04SHong Zhang } Mat_FFTW;
28b2b77a04SHong Zhang
290cf2e031SBarry Smith extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *);
30b2b77a04SHong Zhang
MatMult_SeqFFTW(Mat A,Vec x,Vec y)31523895eeSPierre Jolivet static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y)
32d71ae5a4SJacob Faibussowitsch {
33b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data;
34b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
35f4fca6d4SMatthew G. Knepley const PetscScalar *x_array;
36f4fca6d4SMatthew G. Knepley PetscScalar *y_array;
37e8bdd179SBarry Smith Vec xx;
381acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
39a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
40a1b6d50cSHong Zhang fftw_iodim64 *iodims;
41a1b6d50cSHong Zhang #else
428c1d5ab3SHong Zhang fftw_iodim *iodims;
43a1b6d50cSHong Zhang #endif
441acd80e4SHong Zhang PetscInt i;
451acd80e4SHong Zhang #endif
461acd80e4SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim;
47b2b77a04SHong Zhang
48b2b77a04SHong Zhang PetscFunctionBegin;
49e8bdd179SBarry Smith if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
50e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
51e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx));
52e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array));
53e8bdd179SBarry Smith } else {
549566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array));
55e8bdd179SBarry Smith }
569566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array));
576aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */
58b2b77a04SHong Zhang switch (ndim) {
59b2b77a04SHong Zhang case 1:
6058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
61b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
6258a912c5SAmlan Barua #else
6358a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
6458a912c5SAmlan Barua #endif
65b2b77a04SHong Zhang break;
66b2b77a04SHong Zhang case 2:
6758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
68b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
6958a912c5SAmlan Barua #else
7058a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7158a912c5SAmlan Barua #endif
72b2b77a04SHong Zhang break;
73b2b77a04SHong Zhang case 3:
7458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
75b2b77a04SHong Zhang fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
7658a912c5SAmlan Barua #else
7758a912c5SAmlan Barua fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
7858a912c5SAmlan Barua #endif
79b2b77a04SHong Zhang break;
80b2b77a04SHong Zhang default:
8158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
82a1b6d50cSHong Zhang iodims = fftw->iodims;
83a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
848c1d5ab3SHong Zhang if (ndim) {
85a1b6d50cSHong Zhang iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1];
868c1d5ab3SHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
878c1d5ab3SHong Zhang for (i = ndim - 2; i >= 0; --i) {
88a1b6d50cSHong Zhang iodims[i].n = (ptrdiff_t)dim[i];
898c1d5ab3SHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
908c1d5ab3SHong Zhang }
918c1d5ab3SHong Zhang }
92a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
93a1b6d50cSHong Zhang #else
94a1b6d50cSHong Zhang if (ndim) {
95a1b6d50cSHong Zhang iodims[ndim - 1].n = (int)dim[ndim - 1];
96a1b6d50cSHong Zhang iodims[ndim - 1].is = iodims[ndim - 1].os = 1;
97a1b6d50cSHong Zhang for (i = ndim - 2; i >= 0; --i) {
98a1b6d50cSHong Zhang iodims[i].n = (int)dim[i];
99a1b6d50cSHong Zhang iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n;
100a1b6d50cSHong Zhang }
101a1b6d50cSHong Zhang }
102a1b6d50cSHong Zhang fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag);
103a1b6d50cSHong Zhang #endif
104a1b6d50cSHong Zhang
10558a912c5SAmlan Barua #else
106a31b9140SHong Zhang fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag);
10758a912c5SAmlan Barua #endif
108b2b77a04SHong Zhang break;
109b2b77a04SHong Zhang }
110e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) {
111e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
112e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array));
113e8bdd179SBarry Smith PetscCall(VecDestroy(&xx));
114e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array));
115e8bdd179SBarry Smith } else {
116f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array;
117b2b77a04SHong Zhang fftw->foutarray = y_array;
118e8bdd179SBarry Smith }
119e8bdd179SBarry Smith }
120e8bdd179SBarry Smith
121b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1227e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
123b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
1247e4bc134SDominic Meiser #else
1257e4bc134SDominic Meiser fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array);
1267e4bc134SDominic Meiser #endif
127b2b77a04SHong Zhang } else {
128b2b77a04SHong Zhang fftw_execute(fftw->p_forward);
129b2b77a04SHong Zhang }
1309566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array));
1319566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array));
1323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
133b2b77a04SHong Zhang }
134b2b77a04SHong Zhang
MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)135523895eeSPierre Jolivet static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y)
136d71ae5a4SJacob Faibussowitsch {
137b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data;
138b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
139f4fca6d4SMatthew G. Knepley const PetscScalar *x_array;
140f4fca6d4SMatthew G. Knepley PetscScalar *y_array;
141b2b77a04SHong Zhang PetscInt ndim = fft->ndim, *dim = fft->dim;
142e8bdd179SBarry Smith Vec xx;
1431acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
144a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
145a1b6d50cSHong Zhang fftw_iodim64 *iodims = fftw->iodims;
146a1b6d50cSHong Zhang #else
147a1b6d50cSHong Zhang fftw_iodim *iodims = fftw->iodims;
148a1b6d50cSHong Zhang #endif
1491acd80e4SHong Zhang #endif
150b2b77a04SHong Zhang
151b2b77a04SHong Zhang PetscFunctionBegin;
152e8bdd179SBarry Smith if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
153e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
154e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx));
155e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array));
156e8bdd179SBarry Smith } else {
1579566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array));
158e8bdd179SBarry Smith }
1599566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array));
1606aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */
161b2b77a04SHong Zhang switch (ndim) {
162b2b77a04SHong Zhang case 1:
16358a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
164b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
16558a912c5SAmlan Barua #else
166e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
16758a912c5SAmlan Barua #endif
168b2b77a04SHong Zhang break;
169b2b77a04SHong Zhang case 2:
17058a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
171b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17258a912c5SAmlan Barua #else
173e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
17458a912c5SAmlan Barua #endif
175b2b77a04SHong Zhang break;
176b2b77a04SHong Zhang case 3:
17758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
178b2b77a04SHong Zhang fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
17958a912c5SAmlan Barua #else
180e81bb599SAmlan Barua fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
18158a912c5SAmlan Barua #endif
182b2b77a04SHong Zhang break;
183b2b77a04SHong Zhang default:
18458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
185a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
186a1b6d50cSHong Zhang fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
187a1b6d50cSHong Zhang #else
1888c1d5ab3SHong Zhang fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag);
189a1b6d50cSHong Zhang #endif
19058a912c5SAmlan Barua #else
191a31b9140SHong Zhang fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag);
19258a912c5SAmlan Barua #endif
193b2b77a04SHong Zhang break;
194b2b77a04SHong Zhang }
195e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) {
196e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
197e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array));
198e8bdd179SBarry Smith PetscCall(VecDestroy(&xx));
199e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array));
200e8bdd179SBarry Smith } else {
201f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array;
202b2b77a04SHong Zhang fftw->boutarray = y_array;
203e8bdd179SBarry Smith }
204e8bdd179SBarry Smith }
205b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2067e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
207b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
2087e4bc134SDominic Meiser #else
2097e4bc134SDominic Meiser fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array);
2107e4bc134SDominic Meiser #endif
211b2b77a04SHong Zhang } else {
2122f613bf5SBarry Smith fftw_execute(fftw->p_backward);
213b2b77a04SHong Zhang }
2149566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array));
2159566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array));
2163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
217b2b77a04SHong Zhang }
218b2b77a04SHong Zhang
2190cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
MatMult_MPIFFTW(Mat A,Vec x,Vec y)220523895eeSPierre Jolivet static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y)
221d71ae5a4SJacob Faibussowitsch {
222b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data;
223b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
224f4fca6d4SMatthew G. Knepley const PetscScalar *x_array;
225f4fca6d4SMatthew G. Knepley PetscScalar *y_array;
226c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim;
227ce94432eSBarry Smith MPI_Comm comm;
228e8bdd179SBarry Smith Vec xx;
229b2b77a04SHong Zhang
230b2b77a04SHong Zhang PetscFunctionBegin;
2319566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
232e8bdd179SBarry Smith if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) {
233e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
234e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx));
235e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array));
236e8bdd179SBarry Smith } else {
2379566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array));
238e8bdd179SBarry Smith }
2399566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array));
2406aad120cSJose E. Roman if (!fftw->p_forward) { /* create a plan, then execute it */
241b2b77a04SHong Zhang switch (ndim) {
242b2b77a04SHong Zhang case 1:
2433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
244b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
245ae0a50aaSHong Zhang #else
2464f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
2473c3a9c75SAmlan Barua #endif
248b2b77a04SHong Zhang break;
249b2b77a04SHong Zhang case 2:
25097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
251b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2523c3a9c75SAmlan Barua #else
2533c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2543c3a9c75SAmlan Barua #endif
255b2b77a04SHong Zhang break;
256b2b77a04SHong Zhang case 3:
2573c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
258b2b77a04SHong Zhang fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2593c3a9c75SAmlan Barua #else
26051d1eed7SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2613c3a9c75SAmlan Barua #endif
262b2b77a04SHong Zhang break;
263b2b77a04SHong Zhang default:
2643c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
265c92418ccSAmlan Barua fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag);
2663c3a9c75SAmlan Barua #else
2673c3a9c75SAmlan Barua fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE);
2683c3a9c75SAmlan Barua #endif
269b2b77a04SHong Zhang break;
270b2b77a04SHong Zhang }
271e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) {
272e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
273e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array));
274e8bdd179SBarry Smith PetscCall(VecDestroy(&xx));
275e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array));
276e8bdd179SBarry Smith } else {
277f4fca6d4SMatthew G. Knepley fftw->finarray = (PetscScalar *)x_array;
278b2b77a04SHong Zhang fftw->foutarray = y_array;
279e8bdd179SBarry Smith }
280e8bdd179SBarry Smith }
281b2b77a04SHong Zhang if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
282b2b77a04SHong Zhang fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array);
283b2b77a04SHong Zhang } else {
284b2b77a04SHong Zhang fftw_execute(fftw->p_forward);
285b2b77a04SHong Zhang }
2869566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array));
2879566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array));
2883ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
289b2b77a04SHong Zhang }
290b2b77a04SHong Zhang
MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)291523895eeSPierre Jolivet static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y)
292d71ae5a4SJacob Faibussowitsch {
293b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data;
294b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
295f4fca6d4SMatthew G. Knepley const PetscScalar *x_array;
296f4fca6d4SMatthew G. Knepley PetscScalar *y_array;
297c92418ccSAmlan Barua PetscInt ndim = fft->ndim, *dim = fft->dim;
298ce94432eSBarry Smith MPI_Comm comm;
299e8bdd179SBarry Smith Vec xx;
300b2b77a04SHong Zhang
301b2b77a04SHong Zhang PetscFunctionBegin;
3029566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
303e8bdd179SBarry Smith if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) {
304e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
305e8bdd179SBarry Smith PetscCall(VecDuplicate(x, &xx));
306e8bdd179SBarry Smith PetscCall(VecGetArrayRead(xx, &x_array));
307e8bdd179SBarry Smith } else {
3089566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(x, &x_array));
309e8bdd179SBarry Smith }
3109566063dSJacob Faibussowitsch PetscCall(VecGetArray(y, &y_array));
3116aad120cSJose E. Roman if (!fftw->p_backward) { /* create a plan, then execute it */
312b2b77a04SHong Zhang switch (ndim) {
313b2b77a04SHong Zhang case 1:
3143c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
315b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
316ae0a50aaSHong Zhang #else
3174f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet");
3183c3a9c75SAmlan Barua #endif
319b2b77a04SHong Zhang break;
320b2b77a04SHong Zhang case 2:
32197504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
322b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3233c3a9c75SAmlan Barua #else
3243c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3253c3a9c75SAmlan Barua #endif
326b2b77a04SHong Zhang break;
327b2b77a04SHong Zhang case 3:
3283c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
329b2b77a04SHong Zhang fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3303c3a9c75SAmlan Barua #else
3313c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3323c3a9c75SAmlan Barua #endif
333b2b77a04SHong Zhang break;
334b2b77a04SHong Zhang default:
3353c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
336c92418ccSAmlan Barua fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag);
3373c3a9c75SAmlan Barua #else
3383c3a9c75SAmlan Barua fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE);
3393c3a9c75SAmlan Barua #endif
340b2b77a04SHong Zhang break;
341b2b77a04SHong Zhang }
342e8bdd179SBarry Smith if (fftw->p_flag != FFTW_ESTIMATE) {
343e8bdd179SBarry Smith /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */
344e8bdd179SBarry Smith PetscCall(VecRestoreArrayRead(xx, &x_array));
345e8bdd179SBarry Smith PetscCall(VecDestroy(&xx));
346e8bdd179SBarry Smith PetscCall(VecGetArrayRead(x, &x_array));
347e8bdd179SBarry Smith } else {
348f4fca6d4SMatthew G. Knepley fftw->binarray = (PetscScalar *)x_array;
349b2b77a04SHong Zhang fftw->boutarray = y_array;
350e8bdd179SBarry Smith }
351e8bdd179SBarry Smith }
352b2b77a04SHong Zhang if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
353b2b77a04SHong Zhang fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array);
354b2b77a04SHong Zhang } else {
3552f613bf5SBarry Smith fftw_execute(fftw->p_backward);
356b2b77a04SHong Zhang }
3579566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(y, &y_array));
3589566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(x, &x_array));
3593ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
360b2b77a04SHong Zhang }
3610cf2e031SBarry Smith #endif
362b2b77a04SHong Zhang
MatDestroy_FFTW(Mat A)363523895eeSPierre Jolivet static PetscErrorCode MatDestroy_FFTW(Mat A)
364d71ae5a4SJacob Faibussowitsch {
365b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data;
366b2b77a04SHong Zhang Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
367b2b77a04SHong Zhang
368b2b77a04SHong Zhang PetscFunctionBegin;
369b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_forward);
370b2b77a04SHong Zhang fftw_destroy_plan(fftw->p_backward);
371ad540459SPierre Jolivet if (fftw->iodims) free(fftw->iodims);
3729566063dSJacob Faibussowitsch PetscCall(PetscFree(fftw->dim_fftw));
3739566063dSJacob Faibussowitsch PetscCall(PetscFree(fft->data));
3742e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL));
3752e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL));
3762e956fe4SStefano Zampini PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL));
3770cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
3786ccf0b3eSHong Zhang fftw_mpi_cleanup();
3790cf2e031SBarry Smith #endif
3803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
381b2b77a04SHong Zhang }
382b2b77a04SHong Zhang
3830cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
384c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/
VecDestroy_MPIFFTW(Vec v)385523895eeSPierre Jolivet static PetscErrorCode VecDestroy_MPIFFTW(Vec v)
386d71ae5a4SJacob Faibussowitsch {
387b2b77a04SHong Zhang PetscScalar *array;
388b2b77a04SHong Zhang
389b2b77a04SHong Zhang PetscFunctionBegin;
3909566063dSJacob Faibussowitsch PetscCall(VecGetArray(v, &array));
3912f613bf5SBarry Smith fftw_free((fftw_complex *)array);
3929566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(v, &array));
3939566063dSJacob Faibussowitsch PetscCall(VecDestroy_MPI(v));
3943ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
395b2b77a04SHong Zhang }
3960cf2e031SBarry Smith #endif
397b2b77a04SHong Zhang
3980cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
VecDuplicate_FFTW_fin(Vec fin,Vec * fin_new)399d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new)
400d71ae5a4SJacob Faibussowitsch {
4015b113e21Ss-sajid-ali Mat A;
4025b113e21Ss-sajid-ali
4035b113e21Ss-sajid-ali PetscFunctionBegin;
4049566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A));
4059566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL));
4063ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4075b113e21Ss-sajid-ali }
4085b113e21Ss-sajid-ali
VecDuplicate_FFTW_fout(Vec fout,Vec * fout_new)409d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new)
410d71ae5a4SJacob Faibussowitsch {
4115b113e21Ss-sajid-ali Mat A;
4125b113e21Ss-sajid-ali
4135b113e21Ss-sajid-ali PetscFunctionBegin;
4149566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A));
4159566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL));
4163ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4175b113e21Ss-sajid-ali }
4185b113e21Ss-sajid-ali
VecDuplicate_FFTW_bout(Vec bout,Vec * bout_new)419d71ae5a4SJacob Faibussowitsch static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
420d71ae5a4SJacob Faibussowitsch {
4215b113e21Ss-sajid-ali Mat A;
4225b113e21Ss-sajid-ali
4235b113e21Ss-sajid-ali PetscFunctionBegin;
4249566063dSJacob Faibussowitsch PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A));
4259566063dSJacob Faibussowitsch PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new));
4263ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4275b113e21Ss-sajid-ali }
4280cf2e031SBarry Smith #endif
4295b113e21Ss-sajid-ali
4304be45526SHong Zhang /*@
4312a7a6963SBarry Smith MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
43211a5261eSBarry Smith parallel layout determined by `MATFFTW`
4334f7415efSAmlan Barua
434c3339decSBarry Smith Collective
4354f7415efSAmlan Barua
4364f7415efSAmlan Barua Input Parameter:
4373564f093SHong Zhang . A - the matrix
4384f7415efSAmlan Barua
439d8d19677SJose E. Roman Output Parameters:
440cc55f3b1SHong Zhang + x - (optional) input vector of forward FFTW
4416b867d5aSJose E. Roman . y - (optional) output vector of forward FFTW
442cc55f3b1SHong Zhang - z - (optional) output vector of backward FFTW
4434f7415efSAmlan Barua
4442ef1f0ffSBarry Smith Options Database Key:
4452ef1f0ffSBarry Smith . -mat_fftw_plannerflags - set FFTW planner flags
4462ef1f0ffSBarry Smith
4474f7415efSAmlan Barua Level: advanced
4483564f093SHong Zhang
44911a5261eSBarry Smith Notes:
45011a5261eSBarry Smith The parallel layout of output of forward FFTW is always same as the input
45197504071SAmlan Barua of the backward FFTW. But parallel layout of the input vector of forward
45297504071SAmlan Barua FFTW might not be same as the output of backward FFTW.
45311a5261eSBarry Smith
45411a5261eSBarry Smith We need to provide enough space while doing parallel real transform.
45597504071SAmlan Barua We need to pad extra zeros at the end of last dimension. For this reason the one needs to
45697504071SAmlan Barua invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
457e0ec536eSAmlan Barua last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
458e0ec536eSAmlan Barua depends on if the last dimension is even or odd. If the last dimension is even need to pad two
459e0ec536eSAmlan Barua zeros if it is odd only one zero is needed.
46011a5261eSBarry Smith
461e0ec536eSAmlan Barua Lastly one needs some scratch space at the end of data set in each process. alloc_local
462e0ec536eSAmlan Barua figures out how much space is needed, i.e. it figures out the data+scratch space for
463e0ec536eSAmlan Barua each processor and returns that.
4644f7415efSAmlan Barua
465fe59aa6dSJacob Faibussowitsch Developer Notes:
46611a5261eSBarry Smith How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically?
46711a5261eSBarry Smith
4681cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()`
4694be45526SHong Zhang @*/
MatCreateVecsFFTW(Mat A,Vec * x,Vec * y,Vec * z)470d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z)
471d71ae5a4SJacob Faibussowitsch {
4724be45526SHong Zhang PetscFunctionBegin;
473cac4c232SBarry Smith PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z));
4743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
4754be45526SHong Zhang }
4764be45526SHong Zhang
MatCreateVecsFFTW_FFTW(Mat A,Vec * fin,Vec * fout,Vec * bout)477d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout)
478d71ae5a4SJacob Faibussowitsch {
4794f7415efSAmlan Barua PetscMPIInt size, rank;
480ce94432eSBarry Smith MPI_Comm comm;
4814f7415efSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data;
4824f7415efSAmlan Barua
4834f7415efSAmlan Barua PetscFunctionBegin;
4844f7415efSAmlan Barua PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4854f7415efSAmlan Barua PetscValidType(A, 1);
4869566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
4874f7415efSAmlan Barua
4889566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
4899566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
4904f7415efSAmlan Barua if (size == 1) { /* sequential case */
4914f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4929566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin));
4939566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout));
4949566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout));
4954f7415efSAmlan Barua #else
4969566063dSJacob Faibussowitsch if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin));
4979566063dSJacob Faibussowitsch if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout));
4989566063dSJacob Faibussowitsch if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout));
4994f7415efSAmlan Barua #endif
5000cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
50197504071SAmlan Barua } else { /* parallel cases */
5020cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
5030cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim;
5044f7415efSAmlan Barua ptrdiff_t alloc_local, local_n0, local_0_start;
5059496c9aeSAmlan Barua ptrdiff_t local_n1;
5069496c9aeSAmlan Barua fftw_complex *data_fout;
5079496c9aeSAmlan Barua ptrdiff_t local_1_start;
508*2884b08dSBarry Smith PetscInt n1, N1;
5099496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
5109496c9aeSAmlan Barua fftw_complex *data_fin, *data_bout;
5119496c9aeSAmlan Barua #else
5124f7415efSAmlan Barua double *data_finr, *data_boutr;
5139496c9aeSAmlan Barua ptrdiff_t temp;
5149496c9aeSAmlan Barua #endif
5159496c9aeSAmlan Barua
5164f7415efSAmlan Barua switch (ndim) {
5174f7415efSAmlan Barua case 1:
51857625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5194f8276c3SHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform");
52057625b84SAmlan Barua #else
52157625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
52257625b84SAmlan Barua if (fin) {
52357625b84SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
524*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0, &n1));
525*2884b08dSBarry Smith PetscCall(PetscIntCast(fft->N, &N1));
526*2884b08dSBarry Smith PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fin, fin));
5279566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5285b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
52957625b84SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW;
53057625b84SAmlan Barua }
53157625b84SAmlan Barua if (fout) {
53257625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
533*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n1, &n1));
534*2884b08dSBarry Smith PetscCall(PetscIntCast(fft->N, &N1));
535*2884b08dSBarry Smith PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fout, fout));
5369566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5375b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
53857625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW;
53957625b84SAmlan Barua }
54057625b84SAmlan Barua alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
54157625b84SAmlan Barua if (bout) {
54257625b84SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
543*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n1, &n1));
544*2884b08dSBarry Smith PetscCall(PetscIntCast(fft->N, &N1));
545*2884b08dSBarry Smith PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_bout, bout));
5469566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5475b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
54857625b84SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW;
54957625b84SAmlan Barua }
55057625b84SAmlan Barua break;
55157625b84SAmlan Barua #endif
5524f7415efSAmlan Barua case 2:
55397504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5544f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
555*2884b08dSBarry Smith PetscCall(PetscIntCast(2 * dim[0] * (dim[1] / 2 + 1), &N1));
556*2884b08dSBarry Smith PetscCall(PetscIntCast(2 * local_n0 * (dim[1] / 2 + 1), &n1));
5574f7415efSAmlan Barua if (fin) {
5584f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5599566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
5609566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5615b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5624f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5634f7415efSAmlan Barua }
56457625b84SAmlan Barua if (fout) {
56557625b84SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5669566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout));
5679566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5685b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
56957625b84SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW;
57057625b84SAmlan Barua }
5715b113e21Ss-sajid-ali if (bout) {
5725b113e21Ss-sajid-ali data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
5739566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
5749566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5755b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5765b113e21Ss-sajid-ali (*bout)->ops->destroy = VecDestroy_MPIFFTW;
5775b113e21Ss-sajid-ali }
5784f7415efSAmlan Barua #else
5794f7415efSAmlan Barua /* Get local size */
5804f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
5814f7415efSAmlan Barua if (fin) {
5824f7415efSAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5839566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
5849566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
5855b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5864f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW;
5874f7415efSAmlan Barua }
5884f7415efSAmlan Barua if (fout) {
5894f7415efSAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5909566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
5919566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
5925b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5934f7415efSAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW;
5944f7415efSAmlan Barua }
5954f7415efSAmlan Barua if (bout) {
5964f7415efSAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
5979566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
5989566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
5995b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6004f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6014f7415efSAmlan Barua }
6024f7415efSAmlan Barua #endif
6034f7415efSAmlan Barua break;
6044f7415efSAmlan Barua case 3:
6054f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6064f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
607*2884b08dSBarry Smith PetscCall(PetscIntCast(2 * dim[0] * dim[1] * (dim[2] / 2 + 1), &N1));
608*2884b08dSBarry Smith PetscCall(PetscIntCast(2 * local_n0 * dim[1] * (dim[2] / 2 + 1), &n1));
6094f7415efSAmlan Barua if (fin) {
6104f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6119566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin));
6129566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6135b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6144f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6154f7415efSAmlan Barua }
6165b113e21Ss-sajid-ali if (fout) {
6175b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6189566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout));
6199566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6205b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6215b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW;
6225b113e21Ss-sajid-ali }
6234f7415efSAmlan Barua if (bout) {
6244f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6259566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout));
6269566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6275b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6284f7415efSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6294f7415efSAmlan Barua }
6304f7415efSAmlan Barua #else
6310c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
6320c9b18e4SAmlan Barua if (fin) {
6330c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6349566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6359566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6365b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6370c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6380c9b18e4SAmlan Barua }
6390c9b18e4SAmlan Barua if (fout) {
6400c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6419566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6429566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6435b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6440c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW;
6450c9b18e4SAmlan Barua }
6460c9b18e4SAmlan Barua if (bout) {
6470c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6489566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
6499566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6505b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6510c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6520c9b18e4SAmlan Barua }
6534f7415efSAmlan Barua #endif
6544f7415efSAmlan Barua break;
6554f7415efSAmlan Barua default:
6564f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6574f7415efSAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
6584f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
6594f7415efSAmlan Barua alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
660f4f49eeaSPierre Jolivet N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp);
6614f7415efSAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
6624f7415efSAmlan Barua
6634f7415efSAmlan Barua if (fin) {
6644f7415efSAmlan Barua data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6659566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin));
6669566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6675b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6684f7415efSAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6694f7415efSAmlan Barua }
6705b113e21Ss-sajid-ali if (fout) {
6715b113e21Ss-sajid-ali data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6729566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout));
6739566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6745b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6755b113e21Ss-sajid-ali (*fout)->ops->destroy = VecDestroy_MPIFFTW;
6765b113e21Ss-sajid-ali }
6774f7415efSAmlan Barua if (bout) {
6784f7415efSAmlan Barua data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2);
6799566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout));
6809566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
6815b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6829496c9aeSAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW;
6834f7415efSAmlan Barua }
6844f7415efSAmlan Barua #else
6850c9b18e4SAmlan Barua alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
6860c9b18e4SAmlan Barua if (fin) {
6870c9b18e4SAmlan Barua data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6889566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin));
6899566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A));
6905b113e21Ss-sajid-ali (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6910c9b18e4SAmlan Barua (*fin)->ops->destroy = VecDestroy_MPIFFTW;
6920c9b18e4SAmlan Barua }
6930c9b18e4SAmlan Barua if (fout) {
6940c9b18e4SAmlan Barua data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
6959566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout));
6969566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A));
6975b113e21Ss-sajid-ali (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6980c9b18e4SAmlan Barua (*fout)->ops->destroy = VecDestroy_MPIFFTW;
6990c9b18e4SAmlan Barua }
7000c9b18e4SAmlan Barua if (bout) {
7010c9b18e4SAmlan Barua data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local);
7029566063dSJacob Faibussowitsch PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout));
7039566063dSJacob Faibussowitsch PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A));
7045b113e21Ss-sajid-ali (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
7050c9b18e4SAmlan Barua (*bout)->ops->destroy = VecDestroy_MPIFFTW;
7060c9b18e4SAmlan Barua }
7074f7415efSAmlan Barua #endif
7084f7415efSAmlan Barua break;
7094f7415efSAmlan Barua }
710f9d91177SJunchao Zhang /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
711f9d91177SJunchao Zhang v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
712da81f932SPierre Jolivet memory leaks. We void these pointers here to avoid accident uses.
713f9d91177SJunchao Zhang */
714f9d91177SJunchao Zhang if (fin) (*fin)->ops->replacearray = NULL;
715f9d91177SJunchao Zhang if (fout) (*fout)->ops->replacearray = NULL;
716f9d91177SJunchao Zhang if (bout) (*bout)->ops->replacearray = NULL;
7170cf2e031SBarry Smith #endif
7184f7415efSAmlan Barua }
7193ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
7204f7415efSAmlan Barua }
7214f7415efSAmlan Barua
7223564f093SHong Zhang /*@
72311a5261eSBarry Smith VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls.
72454efbe56SHong Zhang
725c3339decSBarry Smith Collective
7263564f093SHong Zhang
7273564f093SHong Zhang Input Parameters:
7283564f093SHong Zhang + A - FFTW matrix
7293564f093SHong Zhang - x - the PETSc vector
7303564f093SHong Zhang
7312fe279fdSBarry Smith Output Parameter:
7323564f093SHong Zhang . y - the FFTW vector
7333564f093SHong Zhang
734b2b77a04SHong Zhang Level: intermediate
7353564f093SHong Zhang
73611a5261eSBarry Smith Note:
73711a5261eSBarry Smith For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
73897504071SAmlan Barua one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
73997504071SAmlan Barua zeros. This routine does that job by scattering operation.
74097504071SAmlan Barua
7411cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()`
7423564f093SHong Zhang @*/
VecScatterPetscToFFTW(Mat A,Vec x,Vec y)743d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y)
744d71ae5a4SJacob Faibussowitsch {
7453564f093SHong Zhang PetscFunctionBegin;
746cac4c232SBarry Smith PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y));
7473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
7483564f093SHong Zhang }
7493c3a9c75SAmlan Barua
VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)75066976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y)
751d71ae5a4SJacob Faibussowitsch {
752ce94432eSBarry Smith MPI_Comm comm;
7533c3a9c75SAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data;
7549496c9aeSAmlan Barua PetscInt low;
7557a21806cSHong Zhang PetscMPIInt rank, size;
7567a21806cSHong Zhang PetscInt vsize, vsize1;
7573c3a9c75SAmlan Barua VecScatter vecscat;
7580cf2e031SBarry Smith IS list1;
7593c3a9c75SAmlan Barua
7603564f093SHong Zhang PetscFunctionBegin;
7619566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
7629566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
7639566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
7649566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(y, &low, NULL));
7653c3a9c75SAmlan Barua
7663564f093SHong Zhang if (size == 1) {
7679566063dSJacob Faibussowitsch PetscCall(VecGetSize(x, &vsize));
7689566063dSJacob Faibussowitsch PetscCall(VecGetSize(y, &vsize1));
7699566063dSJacob Faibussowitsch PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1));
7709566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
7719566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7729566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
7739566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
7749566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
7750cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
7763564f093SHong Zhang } else {
7770cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
778*2884b08dSBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim, n1;
7790cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start;
7800cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start;
7810cf2e031SBarry Smith IS list2;
7820cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
7830cf2e031SBarry Smith PetscInt i, j, k, partial_dim;
7840cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1;
7850cf2e031SBarry Smith PetscInt NM;
7860cf2e031SBarry Smith ptrdiff_t temp;
787*2884b08dSBarry Smith #else
788*2884b08dSBarry Smith PetscInt nstart, nlow;
7890cf2e031SBarry Smith #endif
7900cf2e031SBarry Smith
7913c3a9c75SAmlan Barua switch (ndim) {
7923c3a9c75SAmlan Barua case 1:
79364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7947a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
79526fbe8dcSKarl Rupp
796*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0, &n1));
797*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start, &nstart));
798*2884b08dSBarry Smith PetscCall(PetscIntCast(low, &nlow));
799*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
800*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
8019566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8029566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8039566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8049566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
8059566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
8069566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
80764657d84SAmlan Barua #else
808e5d7f247SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
80964657d84SAmlan Barua #endif
8103c3a9c75SAmlan Barua break;
8113c3a9c75SAmlan Barua case 2:
812bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8137a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
81426fbe8dcSKarl Rupp
815*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
816*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
817*2884b08dSBarry Smith PetscCall(PetscIntCast(low, &nlow));
818*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
819*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
8209566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8219566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8229566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8239566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
8249566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
8259566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
826bd59e6c6SAmlan Barua #else
827c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
82826fbe8dcSKarl Rupp
82957508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
83057508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
8313c3a9c75SAmlan Barua
8323564f093SHong Zhang if (dim[1] % 2 == 0) {
8333c3a9c75SAmlan Barua NM = dim[1] + 2;
8343564f093SHong Zhang } else {
8353c3a9c75SAmlan Barua NM = dim[1] + 1;
8363564f093SHong Zhang }
8373c3a9c75SAmlan Barua for (i = 0; i < local_n0; i++) {
8383c3a9c75SAmlan Barua for (j = 0; j < dim[1]; j++) {
8393c3a9c75SAmlan Barua tempindx = i * dim[1] + j;
8403c3a9c75SAmlan Barua tempindx1 = i * NM + j;
84126fbe8dcSKarl Rupp
842*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
8433c3a9c75SAmlan Barua indx2[tempindx] = low + tempindx1;
8443c3a9c75SAmlan Barua }
8453c3a9c75SAmlan Barua }
8463c3a9c75SAmlan Barua
847*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
848*2884b08dSBarry Smith PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
849*2884b08dSBarry Smith PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
8503c3a9c75SAmlan Barua
8519566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8529566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8539566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8549566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
8559566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
8569566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
8579566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1));
8589566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2));
859bd59e6c6SAmlan Barua #endif
8609496c9aeSAmlan Barua break;
8613c3a9c75SAmlan Barua
8623c3a9c75SAmlan Barua case 3:
863bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8647a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
86526fbe8dcSKarl Rupp
866*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
867*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
868*2884b08dSBarry Smith PetscCall(PetscIntCast(low, &nlow));
869*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
870*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
8719566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
8729566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8739566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
8749566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
8759566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
8769566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
877bd59e6c6SAmlan Barua #else
878c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
879f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform");
8807a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
88126fbe8dcSKarl Rupp
88257508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
88357508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
88451d1eed7SAmlan Barua
885db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2;
886db4deed7SKarl Rupp else NM = dim[2] + 1;
88751d1eed7SAmlan Barua
88851d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) {
88951d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) {
89051d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) {
89151d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
89251d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k;
89326fbe8dcSKarl Rupp
894*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
895*2884b08dSBarry Smith PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
89651d1eed7SAmlan Barua }
89751d1eed7SAmlan Barua }
89851d1eed7SAmlan Barua }
89951d1eed7SAmlan Barua
9009566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1));
9019566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2));
9029566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9039566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9049566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9059566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
9069566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
9079566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
9089566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1));
9099566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2));
910bd59e6c6SAmlan Barua #endif
9119496c9aeSAmlan Barua break;
9123c3a9c75SAmlan Barua
9133c3a9c75SAmlan Barua default:
914bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
9157a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
91626fbe8dcSKarl Rupp
917*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
918*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
919*2884b08dSBarry Smith PetscCall(PetscIntCast(low, &nlow));
920*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
921*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
9229566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9239566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9249566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9259566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
9269566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
9279566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
928bd59e6c6SAmlan Barua #else
929c4762a1bSJed Brown /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
930f1ade23cSHong Zhang SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform");
931e5d7f247SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
93226fbe8dcSKarl Rupp
933e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
93426fbe8dcSKarl Rupp
9357a21806cSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
93626fbe8dcSKarl Rupp
937e5d7f247SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
938e5d7f247SAmlan Barua
939e5d7f247SAmlan Barua partial_dim = fftw->partial_dim;
940e5d7f247SAmlan Barua
94157508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
94257508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
943e5d7f247SAmlan Barua
944db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2;
945db4deed7SKarl Rupp else NM = 1;
946e5d7f247SAmlan Barua
9476971673cSAmlan Barua j = low;
94857508eceSPierre Jolivet for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
9496971673cSAmlan Barua indx1[i] = local_0_start * partial_dim + i;
9506971673cSAmlan Barua indx2[i] = j;
95126fbe8dcSKarl Rupp if (k % dim[ndim - 1] == 0) j += NM;
9526971673cSAmlan Barua j++;
9536971673cSAmlan Barua }
9549566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1));
9559566063dSJacob Faibussowitsch PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2));
9569566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
9579566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9589566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
9599566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
9609566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
9619566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
9629566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1));
9639566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2));
964bd59e6c6SAmlan Barua #endif
9659496c9aeSAmlan Barua break;
9663c3a9c75SAmlan Barua }
9670cf2e031SBarry Smith #endif
968e81bb599SAmlan Barua }
9693ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
9703c3a9c75SAmlan Barua }
9713c3a9c75SAmlan Barua
9723564f093SHong Zhang /*@
97311a5261eSBarry Smith VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector.
9743564f093SHong Zhang
975c3339decSBarry Smith Collective
9763564f093SHong Zhang
9773564f093SHong Zhang Input Parameters:
97811a5261eSBarry Smith + A - `MATFFTW` matrix
9793564f093SHong Zhang - x - FFTW vector
9803564f093SHong Zhang
9812fe279fdSBarry Smith Output Parameter:
9823564f093SHong Zhang . y - PETSc vector
9833564f093SHong Zhang
9843564f093SHong Zhang Level: intermediate
9853564f093SHong Zhang
98611a5261eSBarry Smith Note:
98711a5261eSBarry Smith While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
98811a5261eSBarry Smith `VecScatterFFTWToPetsc()` removes those extra zeros.
9893564f093SHong Zhang
9901cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()`
9913564f093SHong Zhang @*/
VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)992d71ae5a4SJacob Faibussowitsch PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y)
993d71ae5a4SJacob Faibussowitsch {
9943c3a9c75SAmlan Barua PetscFunctionBegin;
995cac4c232SBarry Smith PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y));
9963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
9973c3a9c75SAmlan Barua }
99854efbe56SHong Zhang
VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)99966976f2fSJacob Faibussowitsch static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y)
1000d71ae5a4SJacob Faibussowitsch {
1001ce94432eSBarry Smith MPI_Comm comm;
10025b3e41ffSAmlan Barua Mat_FFT *fft = (Mat_FFT *)A->data;
10039496c9aeSAmlan Barua PetscInt low;
10047a21806cSHong Zhang PetscMPIInt rank, size;
10055b3e41ffSAmlan Barua VecScatter vecscat;
10060cf2e031SBarry Smith IS list1;
10079496c9aeSAmlan Barua
10083564f093SHong Zhang PetscFunctionBegin;
10099566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
10109566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
10119566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
10129566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, NULL));
10135b3e41ffSAmlan Barua
1014e81bb599SAmlan Barua if (size == 1) {
10159566063dSJacob Faibussowitsch PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1));
10169566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat));
10179566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10189566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10199566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
10209566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
1021e81bb599SAmlan Barua
10220cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
10233564f093SHong Zhang } else {
10240cf2e031SBarry Smith Mat_FFTW *fftw = (Mat_FFTW *)fft->data;
10250cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim;
10260cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start;
10270cf2e031SBarry Smith ptrdiff_t local_n1, local_1_start;
10280cf2e031SBarry Smith IS list2;
10290cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
10300cf2e031SBarry Smith PetscInt i, j, k, partial_dim;
10310cf2e031SBarry Smith PetscInt *indx1, *indx2, tempindx, tempindx1;
10320cf2e031SBarry Smith PetscInt NM;
10330cf2e031SBarry Smith ptrdiff_t temp;
1034*2884b08dSBarry Smith #else
1035*2884b08dSBarry Smith PetscInt nlow, nstart;
10360cf2e031SBarry Smith #endif
1037*2884b08dSBarry Smith PetscInt n1;
1038e81bb599SAmlan Barua switch (ndim) {
1039e81bb599SAmlan Barua case 1:
104064657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10417a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
104226fbe8dcSKarl Rupp
1043*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n1, &n1));
1044*2884b08dSBarry Smith PetscCall(PetscIntCast(local_1_start, &nstart));
1045*2884b08dSBarry Smith PetscCall(PetscIntCast(low, &nlow));
1046*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1047*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
10489566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
10499566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10509566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10519566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
10529566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
10539566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
105464657d84SAmlan Barua #else
10556a506ed8SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT");
105664657d84SAmlan Barua #endif
10575b3e41ffSAmlan Barua break;
10585b3e41ffSAmlan Barua case 2:
1059bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10607a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
106126fbe8dcSKarl Rupp
1062*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1063*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * dim[1], &nstart));
1064*2884b08dSBarry Smith PetscCall(PetscIntCast(low, &nlow));
1065*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1066*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
10679566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10689566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10699566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10709566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
10719566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
10729566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
1073bd59e6c6SAmlan Barua #else
10747a21806cSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
107526fbe8dcSKarl Rupp
107657508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1));
107757508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2));
10785b3e41ffSAmlan Barua
1079db4deed7SKarl Rupp if (dim[1] % 2 == 0) NM = dim[1] + 2;
1080db4deed7SKarl Rupp else NM = dim[1] + 1;
10815b3e41ffSAmlan Barua
10825b3e41ffSAmlan Barua for (i = 0; i < local_n0; i++) {
10835b3e41ffSAmlan Barua for (j = 0; j < dim[1]; j++) {
10845b3e41ffSAmlan Barua tempindx = i * dim[1] + j;
10855b3e41ffSAmlan Barua tempindx1 = i * NM + j;
108626fbe8dcSKarl Rupp
1087*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx]));
1088*2884b08dSBarry Smith PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
10895b3e41ffSAmlan Barua }
10905b3e41ffSAmlan Barua }
10915b3e41ffSAmlan Barua
1092*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * dim[1], &n1));
1093*2884b08dSBarry Smith PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1094*2884b08dSBarry Smith PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
10955b3e41ffSAmlan Barua
10969566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
10979566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10989566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
10999566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
11009566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
11019566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
11029566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1));
11039566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2));
1104bd59e6c6SAmlan Barua #endif
11059496c9aeSAmlan Barua break;
11065b3e41ffSAmlan Barua case 3:
1107bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11087a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
110926fbe8dcSKarl Rupp
1110*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1111*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart));
1112*2884b08dSBarry Smith PetscCall(PetscIntCast(low, &nlow));
1113*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1114*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
11159566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
11169566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11179566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11189566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
11199566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
11209566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
1121bd59e6c6SAmlan Barua #else
11227a21806cSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
112326fbe8dcSKarl Rupp
112457508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1));
112557508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2));
112651d1eed7SAmlan Barua
1127db4deed7SKarl Rupp if (dim[2] % 2 == 0) NM = dim[2] + 2;
1128db4deed7SKarl Rupp else NM = dim[2] + 1;
112951d1eed7SAmlan Barua
113051d1eed7SAmlan Barua for (i = 0; i < local_n0; i++) {
113151d1eed7SAmlan Barua for (j = 0; j < dim[1]; j++) {
113251d1eed7SAmlan Barua for (k = 0; k < dim[2]; k++) {
113351d1eed7SAmlan Barua tempindx = i * dim[1] * dim[2] + j * dim[2] + k;
113451d1eed7SAmlan Barua tempindx1 = i * dim[1] * NM + j * NM + k;
113526fbe8dcSKarl Rupp
1136*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx]));
1137*2884b08dSBarry Smith PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx]));
113851d1eed7SAmlan Barua }
113951d1eed7SAmlan Barua }
114051d1eed7SAmlan Barua }
114151d1eed7SAmlan Barua
1142*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1));
1143*2884b08dSBarry Smith PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1144*2884b08dSBarry Smith PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
114551d1eed7SAmlan Barua
11469566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
11479566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11489566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11499566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
11509566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
11519566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
11529566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1));
11539566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2));
1154bd59e6c6SAmlan Barua #endif
11559496c9aeSAmlan Barua break;
11565b3e41ffSAmlan Barua default:
1157bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11587a21806cSHong Zhang fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start);
115926fbe8dcSKarl Rupp
1160*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1));
1161*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart));
1162*2884b08dSBarry Smith PetscCall(PetscIntCast(low, &nlow));
1163*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1));
1164*2884b08dSBarry Smith PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2));
11659566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat));
11669566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11679566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
11689566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
11699566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
11709566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
1171bd59e6c6SAmlan Barua #else
1172ba6e06d0SAmlan Barua temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1];
117326fbe8dcSKarl Rupp
1174ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1;
117526fbe8dcSKarl Rupp
1176c3eba89aSHong Zhang fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
117726fbe8dcSKarl Rupp
1178ba6e06d0SAmlan Barua (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp;
1179ba6e06d0SAmlan Barua
1180ba6e06d0SAmlan Barua partial_dim = fftw->partial_dim;
1181ba6e06d0SAmlan Barua
118257508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1));
118357508eceSPierre Jolivet PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2));
1184ba6e06d0SAmlan Barua
1185db4deed7SKarl Rupp if (dim[ndim - 1] % 2 == 0) NM = 2;
1186db4deed7SKarl Rupp else NM = 1;
1187ba6e06d0SAmlan Barua
1188ba6e06d0SAmlan Barua j = low;
118957508eceSPierre Jolivet for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) {
1190*2884b08dSBarry Smith PetscCall(PetscIntCast(local_0_start * partial_dim + i, &indx1[i]));
1191ba6e06d0SAmlan Barua indx2[i] = j;
11923564f093SHong Zhang if (k % dim[ndim - 1] == 0) j += NM;
1193ba6e06d0SAmlan Barua j++;
1194ba6e06d0SAmlan Barua }
1195*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * partial_dim, &n1));
1196*2884b08dSBarry Smith PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1));
1197*2884b08dSBarry Smith PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2));
1198ba6e06d0SAmlan Barua
11999566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat));
12009566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
12019566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD));
12029566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vecscat));
12039566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list1));
12049566063dSJacob Faibussowitsch PetscCall(ISDestroy(&list2));
12059566063dSJacob Faibussowitsch PetscCall(PetscFree(indx1));
12069566063dSJacob Faibussowitsch PetscCall(PetscFree(indx2));
1207bd59e6c6SAmlan Barua #endif
12089496c9aeSAmlan Barua break;
12095b3e41ffSAmlan Barua }
12100cf2e031SBarry Smith #endif
1211e81bb599SAmlan Barua }
12123ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
12135b3e41ffSAmlan Barua }
12145b3e41ffSAmlan Barua
1215350f1385SJose E. Roman /*MC
1216350f1385SJose E. Roman MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package.
1217350f1385SJose E. Roman
1218350f1385SJose E. Roman Level: intermediate
1219350f1385SJose E. Roman
12201cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()`
1221350f1385SJose E. Roman M*/
MatCreate_FFTW(Mat A)1222d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1223d71ae5a4SJacob Faibussowitsch {
1224ce94432eSBarry Smith MPI_Comm comm;
1225b2b77a04SHong Zhang Mat_FFT *fft = (Mat_FFT *)A->data;
1226b2b77a04SHong Zhang Mat_FFTW *fftw;
12270cf2e031SBarry Smith PetscInt ndim = fft->ndim, *dim = fft->dim;
12285274ed1bSHong Zhang const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"};
12295274ed1bSHong Zhang unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE};
1230b2b77a04SHong Zhang PetscBool flg;
12314f48bc67SAmlan Barua PetscInt p_flag, partial_dim = 1, ctr;
123211902ff2SHong Zhang PetscMPIInt size, rank;
12339496c9aeSAmlan Barua ptrdiff_t *pdim;
12349496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12350cf2e031SBarry Smith PetscInt tot_dim;
12369496c9aeSAmlan Barua #endif
12379496c9aeSAmlan Barua
1238b2b77a04SHong Zhang PetscFunctionBegin;
12399566063dSJacob Faibussowitsch PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
12409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(comm, &size));
12419566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(comm, &rank));
1242c92418ccSAmlan Barua
12430cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12441878d478SAmlan Barua fftw_mpi_init();
12450cf2e031SBarry Smith #endif
124611902ff2SHong Zhang pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t));
124711902ff2SHong Zhang pdim[0] = dim[0];
12488ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12498ca4c034SAmlan Barua tot_dim = 2 * dim[0];
12508ca4c034SAmlan Barua #endif
12513564f093SHong Zhang for (ctr = 1; ctr < ndim; ctr++) {
12526882372aSHong Zhang partial_dim *= dim[ctr];
125311902ff2SHong Zhang pdim[ctr] = dim[ctr];
12548ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1255db4deed7SKarl Rupp if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1);
1256db4deed7SKarl Rupp else tot_dim *= dim[ctr];
12578ca4c034SAmlan Barua #endif
12586882372aSHong Zhang }
12593c3a9c75SAmlan Barua
1260b2b77a04SHong Zhang if (size == 1) {
1261e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12629566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N));
12630cf2e031SBarry Smith fft->n = fft->N;
1264e81bb599SAmlan Barua #else
12659566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim));
12660cf2e031SBarry Smith fft->n = tot_dim;
12670cf2e031SBarry Smith #endif
12680cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
12690cf2e031SBarry Smith } else {
12700cf2e031SBarry Smith ptrdiff_t local_n0, local_0_start, local_n1, local_1_start;
12710cf2e031SBarry Smith #if !defined(PETSC_USE_COMPLEX)
12720cf2e031SBarry Smith ptrdiff_t temp;
12730cf2e031SBarry Smith PetscInt N1;
1274*2884b08dSBarry Smith #else
1275*2884b08dSBarry Smith PetscInt n1;
1276e81bb599SAmlan Barua #endif
1277e81bb599SAmlan Barua
1278b2b77a04SHong Zhang switch (ndim) {
1279b2b77a04SHong Zhang case 1:
12803c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12813c3a9c75SAmlan Barua SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform");
1282e5d7f247SAmlan Barua #else
12837a21806cSHong Zhang fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start);
1284*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0, &fft->n));
1285*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n1, &n1));
1286*2884b08dSBarry Smith PetscCall(MatSetSizes(A, n1, fft->n, fft->N, fft->N));
1287e5d7f247SAmlan Barua #endif
1288b2b77a04SHong Zhang break;
1289b2b77a04SHong Zhang case 2:
12905b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
12917a21806cSHong Zhang fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start);
12920cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1];
12939566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
12945b3e41ffSAmlan Barua #else
1295c3eba89aSHong Zhang fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
129626fbe8dcSKarl Rupp
12970cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1);
12989566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1)));
12995b3e41ffSAmlan Barua #endif
1300b2b77a04SHong Zhang break;
1301b2b77a04SHong Zhang case 3:
130251d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
13037a21806cSHong Zhang fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start);
130426fbe8dcSKarl Rupp
13050cf2e031SBarry Smith fft->n = (PetscInt)local_n0 * dim[1] * dim[2];
13069566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
130751d1eed7SAmlan Barua #else
1308c3eba89aSHong Zhang fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
130926fbe8dcSKarl Rupp
13100cf2e031SBarry Smith fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1);
13119566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1)));
131251d1eed7SAmlan Barua #endif
1313b2b77a04SHong Zhang break;
1314b2b77a04SHong Zhang default:
1315b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
13167a21806cSHong Zhang fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start);
131726fbe8dcSKarl Rupp
1318*2884b08dSBarry Smith PetscCall(PetscIntCast(local_n0 * partial_dim, &fft->n));
13199566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N));
1320b3a17365SAmlan Barua #else
1321b3a17365SAmlan Barua temp = pdim[ndim - 1];
132226fbe8dcSKarl Rupp
1323b3a17365SAmlan Barua pdim[ndim - 1] = temp / 2 + 1;
132426fbe8dcSKarl Rupp
1325c3eba89aSHong Zhang fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start);
132626fbe8dcSKarl Rupp
1327*2884b08dSBarry Smith PetscCall(PetscIntCast(2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp, &fft->n));
13280cf2e031SBarry Smith N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp);
132926fbe8dcSKarl Rupp
1330b3a17365SAmlan Barua pdim[ndim - 1] = temp;
133126fbe8dcSKarl Rupp
13329566063dSJacob Faibussowitsch PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1));
1333b3a17365SAmlan Barua #endif
1334b2b77a04SHong Zhang break;
1335b2b77a04SHong Zhang }
13360cf2e031SBarry Smith #endif
1337b2b77a04SHong Zhang }
13382277172eSMark Adams free(pdim);
13399566063dSJacob Faibussowitsch PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW));
13404dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&fftw));
1341b2b77a04SHong Zhang fft->data = (void *)fftw;
1342b2b77a04SHong Zhang
1343*2884b08dSBarry Smith fftw->ndim_fftw = ndim; /* This is dimension of fft */
1344e5d7f247SAmlan Barua fftw->partial_dim = partial_dim;
134526fbe8dcSKarl Rupp
1346f4f49eeaSPierre Jolivet PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw));
13478c1d5ab3SHong Zhang if (size == 1) {
1348a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1349a1b6d50cSHong Zhang fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim);
1350a1b6d50cSHong Zhang #else
13518c1d5ab3SHong Zhang fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim);
1352a1b6d50cSHong Zhang #endif
13538c1d5ab3SHong Zhang }
135426fbe8dcSKarl Rupp
1355b1301b2eSHong Zhang for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr];
1356c92418ccSAmlan Barua
1357f4259b30SLisandro Dalcin fftw->p_forward = NULL;
1358f4259b30SLisandro Dalcin fftw->p_backward = NULL;
1359b2b77a04SHong Zhang fftw->p_flag = FFTW_ESTIMATE;
1360b2b77a04SHong Zhang
1361b2b77a04SHong Zhang if (size == 1) {
1362b2b77a04SHong Zhang A->ops->mult = MatMult_SeqFFTW;
1363b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_SeqFFTW;
13640cf2e031SBarry Smith #if !PetscDefined(HAVE_MPIUNI)
1365b2b77a04SHong Zhang } else {
1366b2b77a04SHong Zhang A->ops->mult = MatMult_MPIFFTW;
1367b2b77a04SHong Zhang A->ops->multtranspose = MatMultTranspose_MPIFFTW;
13680cf2e031SBarry Smith #endif
1369b2b77a04SHong Zhang }
1370b2b77a04SHong Zhang fft->matdestroy = MatDestroy_FFTW;
1371b2b77a04SHong Zhang A->assembled = PETSC_TRUE;
13724a52bad8SHong Zhang A->preallocated = PETSC_TRUE;
137326fbe8dcSKarl Rupp
13749566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW));
13759566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW));
13769566063dSJacob Faibussowitsch PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW));
1377b2b77a04SHong Zhang
1378b2b77a04SHong Zhang /* get runtime options */
1379d0609cedSBarry Smith PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat");
13809566063dSJacob Faibussowitsch PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg));
13815f80ce2aSJacob Faibussowitsch if (flg) fftw->p_flag = iplans[p_flag];
1382d0609cedSBarry Smith PetscOptionsEnd();
13833ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS);
1384b2b77a04SHong Zhang }
1385