xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 2f613bf53f46f9356e00a2ca2bd69453be72fc31) !
1b2b77a04SHong Zhang 
2b2b77a04SHong Zhang /*
3b2b77a04SHong Zhang     Provides an interface to the FFTW package.
4c4762a1bSJed Brown     Testing examples can be found in ~src/mat/tests
5b2b77a04SHong Zhang */
6b2b77a04SHong Zhang 
7c6db04a5SJed Brown #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8b2b77a04SHong Zhang EXTERN_C_BEGIN
9c6db04a5SJed Brown #include <fftw3-mpi.h>
10b2b77a04SHong Zhang EXTERN_C_END
11b2b77a04SHong Zhang 
12b2b77a04SHong Zhang typedef struct {
13b9ae062cSHong Zhang   ptrdiff_t    ndim_fftw,*dim_fftw;
14a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
15a1b6d50cSHong Zhang   fftw_iodim64 *iodims;
16a1b6d50cSHong Zhang #else
178c1d5ab3SHong Zhang   fftw_iodim   *iodims;
18a1b6d50cSHong Zhang #endif
19e5d7f247SAmlan Barua   PetscInt     partial_dim;
20b2b77a04SHong Zhang   fftw_plan    p_forward,p_backward;
21b2b77a04SHong Zhang   unsigned     p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
22b2b77a04SHong Zhang   PetscScalar  *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
23b2b77a04SHong Zhang                                                             executed for the arrays with which the plan was created */
24b2b77a04SHong Zhang } Mat_FFTW;
25b2b77a04SHong Zhang 
26b2b77a04SHong Zhang extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
27b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
28b2b77a04SHong Zhang extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
29b2b77a04SHong Zhang extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
30b2b77a04SHong Zhang extern PetscErrorCode MatDestroy_FFTW(Mat);
31b2b77a04SHong Zhang extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
325b113e21Ss-sajid-ali extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*);
33b2b77a04SHong Zhang 
3497504071SAmlan Barua /* MatMult_SeqFFTW performs forward DFT in parallel
3597504071SAmlan Barua    Input parameter:
363564f093SHong Zhang      A - the matrix
3797504071SAmlan Barua      x - the vector on which FDFT will be performed
3897504071SAmlan Barua 
3997504071SAmlan Barua    Output parameter:
4097504071SAmlan Barua      y - vector that stores result of FDFT
4197504071SAmlan Barua */
42b2b77a04SHong Zhang PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
43b2b77a04SHong Zhang {
44b2b77a04SHong Zhang   PetscErrorCode ierr;
45b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
46b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
47f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
48f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
491acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
50a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
51a1b6d50cSHong Zhang   fftw_iodim64   *iodims;
52a1b6d50cSHong Zhang #else
538c1d5ab3SHong Zhang   fftw_iodim     *iodims;
54a1b6d50cSHong Zhang #endif
551acd80e4SHong Zhang   PetscInt       i;
561acd80e4SHong Zhang #endif
571acd80e4SHong Zhang   PetscInt       ndim = fft->ndim,*dim = fft->dim;
58b2b77a04SHong Zhang 
59b2b77a04SHong Zhang   PetscFunctionBegin;
60f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
61b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
62b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
63b2b77a04SHong Zhang     switch (ndim) {
64b2b77a04SHong Zhang     case 1:
6558a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
66b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
6758a912c5SAmlan Barua #else
6858a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
6958a912c5SAmlan Barua #endif
70b2b77a04SHong Zhang       break;
71b2b77a04SHong Zhang     case 2:
7258a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
73b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
7458a912c5SAmlan Barua #else
7558a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
7658a912c5SAmlan Barua #endif
77b2b77a04SHong Zhang       break;
78b2b77a04SHong Zhang     case 3:
7958a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
80b2b77a04SHong Zhang       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
8158a912c5SAmlan Barua #else
8258a912c5SAmlan Barua       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
8358a912c5SAmlan Barua #endif
84b2b77a04SHong Zhang       break;
85b2b77a04SHong Zhang     default:
8658a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
87a1b6d50cSHong Zhang       iodims = fftw->iodims;
88a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
898c1d5ab3SHong Zhang       if (ndim) {
90a1b6d50cSHong Zhang         iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1];
918c1d5ab3SHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
928c1d5ab3SHong Zhang         for (i=ndim-2; i>=0; --i) {
93a1b6d50cSHong Zhang           iodims[i].n = (ptrdiff_t)dim[i];
948c1d5ab3SHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
958c1d5ab3SHong Zhang         }
968c1d5ab3SHong Zhang       }
97a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
98a1b6d50cSHong Zhang #else
99a1b6d50cSHong Zhang       if (ndim) {
100a1b6d50cSHong Zhang         iodims[ndim-1].n = (int)dim[ndim-1];
101a1b6d50cSHong Zhang         iodims[ndim-1].is = iodims[ndim-1].os = 1;
102a1b6d50cSHong Zhang         for (i=ndim-2; i>=0; --i) {
103a1b6d50cSHong Zhang           iodims[i].n = (int)dim[i];
104a1b6d50cSHong Zhang           iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n;
105a1b6d50cSHong Zhang         }
106a1b6d50cSHong Zhang       }
107a1b6d50cSHong Zhang       fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
108a1b6d50cSHong Zhang #endif
109a1b6d50cSHong Zhang 
11058a912c5SAmlan Barua #else
111a31b9140SHong Zhang       fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
11258a912c5SAmlan Barua #endif
113b2b77a04SHong Zhang       break;
114b2b77a04SHong Zhang     }
115f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
116b2b77a04SHong Zhang     fftw->foutarray = y_array;
117b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
118b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
119b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
120b2b77a04SHong Zhang   } else { /* use existing plan */
121b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
1227e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
123b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
1247e4bc134SDominic Meiser #else
1257e4bc134SDominic Meiser       fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array);
1267e4bc134SDominic Meiser #endif
127b2b77a04SHong Zhang     } else {
128b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
129b2b77a04SHong Zhang     }
130b2b77a04SHong Zhang   }
131b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
132f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
133b2b77a04SHong Zhang   PetscFunctionReturn(0);
134b2b77a04SHong Zhang }
135b2b77a04SHong Zhang 
13697504071SAmlan Barua /* MatMultTranspose_SeqFFTW performs serial backward DFT
13797504071SAmlan Barua    Input parameter:
1383564f093SHong Zhang      A - the matrix
13997504071SAmlan Barua      x - the vector on which BDFT will be performed
14097504071SAmlan Barua 
14197504071SAmlan Barua    Output parameter:
14297504071SAmlan Barua      y - vector that stores result of BDFT
14397504071SAmlan Barua */
14497504071SAmlan Barua 
145b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
146b2b77a04SHong Zhang {
147b2b77a04SHong Zhang   PetscErrorCode ierr;
148b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
149b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
150f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
151f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
152b2b77a04SHong Zhang   PetscInt       ndim=fft->ndim,*dim=fft->dim;
1531acd80e4SHong Zhang #if defined(PETSC_USE_COMPLEX)
154a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
155a1b6d50cSHong Zhang   fftw_iodim64   *iodims=fftw->iodims;
156a1b6d50cSHong Zhang #else
157a1b6d50cSHong Zhang   fftw_iodim     *iodims=fftw->iodims;
158a1b6d50cSHong Zhang #endif
1591acd80e4SHong Zhang #endif
160b2b77a04SHong Zhang 
161b2b77a04SHong Zhang   PetscFunctionBegin;
162f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
163b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
164b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
165b2b77a04SHong Zhang     switch (ndim) {
166b2b77a04SHong Zhang     case 1:
16758a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
168b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
16958a912c5SAmlan Barua #else
170e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17158a912c5SAmlan Barua #endif
172b2b77a04SHong Zhang       break;
173b2b77a04SHong Zhang     case 2:
17458a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
175b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
17658a912c5SAmlan Barua #else
177e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
17858a912c5SAmlan Barua #endif
179b2b77a04SHong Zhang       break;
180b2b77a04SHong Zhang     case 3:
18158a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
182b2b77a04SHong Zhang       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
18358a912c5SAmlan Barua #else
184e81bb599SAmlan Barua       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
18558a912c5SAmlan Barua #endif
186b2b77a04SHong Zhang       break;
187b2b77a04SHong Zhang     default:
18858a912c5SAmlan Barua #if defined(PETSC_USE_COMPLEX)
189a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
190a1b6d50cSHong Zhang       fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
191a1b6d50cSHong Zhang #else
1928c1d5ab3SHong Zhang       fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
193a1b6d50cSHong Zhang #endif
19458a912c5SAmlan Barua #else
195a31b9140SHong Zhang       fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
19658a912c5SAmlan Barua #endif
197b2b77a04SHong Zhang       break;
198b2b77a04SHong Zhang     }
199f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
200b2b77a04SHong Zhang     fftw->boutarray = y_array;
201*2f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
202b2b77a04SHong Zhang   } else { /* use existing plan */
203b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
2047e4bc134SDominic Meiser #if defined(PETSC_USE_COMPLEX)
205b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
2067e4bc134SDominic Meiser #else
2077e4bc134SDominic Meiser       fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array);
2087e4bc134SDominic Meiser #endif
209b2b77a04SHong Zhang     } else {
210*2f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
211b2b77a04SHong Zhang     }
212b2b77a04SHong Zhang   }
213b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
214f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
215b2b77a04SHong Zhang   PetscFunctionReturn(0);
216b2b77a04SHong Zhang }
217b2b77a04SHong Zhang 
21897504071SAmlan Barua /* MatMult_MPIFFTW performs forward DFT in parallel
21997504071SAmlan Barua    Input parameter:
2203564f093SHong Zhang      A - the matrix
22197504071SAmlan Barua      x - the vector on which FDFT will be performed
22297504071SAmlan Barua 
22397504071SAmlan Barua    Output parameter:
22497504071SAmlan Barua    y   - vector that stores result of FDFT
22597504071SAmlan Barua */
226b2b77a04SHong Zhang PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
227b2b77a04SHong Zhang {
228b2b77a04SHong Zhang   PetscErrorCode ierr;
229b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
230b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
231f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
232f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
233c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
234ce94432eSBarry Smith   MPI_Comm       comm;
235b2b77a04SHong Zhang 
236b2b77a04SHong Zhang   PetscFunctionBegin;
237ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
238f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
239b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
240b2b77a04SHong Zhang   if (!fftw->p_forward) { /* create a plan, then excute it */
241b2b77a04SHong Zhang     switch (ndim) {
242b2b77a04SHong Zhang     case 1:
2433c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
244b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
245ae0a50aaSHong Zhang #else
2464f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
2473c3a9c75SAmlan Barua #endif
248b2b77a04SHong Zhang       break;
249b2b77a04SHong Zhang     case 2:
25097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
251b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2523c3a9c75SAmlan Barua #else
2533c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2543c3a9c75SAmlan Barua #endif
255b2b77a04SHong Zhang       break;
256b2b77a04SHong Zhang     case 3:
2573c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
258b2b77a04SHong Zhang       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2593c3a9c75SAmlan Barua #else
26051d1eed7SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2613c3a9c75SAmlan Barua #endif
262b2b77a04SHong Zhang       break;
263b2b77a04SHong Zhang     default:
2643c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
265c92418ccSAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
2663c3a9c75SAmlan Barua #else
2673c3a9c75SAmlan Barua       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
2683c3a9c75SAmlan Barua #endif
269b2b77a04SHong Zhang       break;
270b2b77a04SHong Zhang     }
271f4fca6d4SMatthew G. Knepley     fftw->finarray  = (PetscScalar *) x_array;
272b2b77a04SHong Zhang     fftw->foutarray = y_array;
273b2b77a04SHong Zhang     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
274b2b77a04SHong Zhang                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
275b2b77a04SHong Zhang     fftw_execute(fftw->p_forward);
276b2b77a04SHong Zhang   } else { /* use existing plan */
277b2b77a04SHong Zhang     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
278b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
279b2b77a04SHong Zhang     } else {
280b2b77a04SHong Zhang       fftw_execute(fftw->p_forward);
281b2b77a04SHong Zhang     }
282b2b77a04SHong Zhang   }
283b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
284f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
285b2b77a04SHong Zhang   PetscFunctionReturn(0);
286b2b77a04SHong Zhang }
287b2b77a04SHong Zhang 
28897504071SAmlan Barua /* MatMultTranspose_MPIFFTW performs parallel backward DFT
28997504071SAmlan Barua    Input parameter:
2903564f093SHong Zhang      A - the matrix
29197504071SAmlan Barua      x - the vector on which BDFT will be performed
29297504071SAmlan Barua 
29397504071SAmlan Barua    Output parameter:
29497504071SAmlan Barua      y - vector that stores result of BDFT
29597504071SAmlan Barua */
296b2b77a04SHong Zhang PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
297b2b77a04SHong Zhang {
298b2b77a04SHong Zhang   PetscErrorCode ierr;
299b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
300b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
301f4fca6d4SMatthew G. Knepley   const PetscScalar *x_array;
302f4fca6d4SMatthew G. Knepley   PetscScalar    *y_array;
303c92418ccSAmlan Barua   PetscInt       ndim=fft->ndim,*dim=fft->dim;
304ce94432eSBarry Smith   MPI_Comm       comm;
305b2b77a04SHong Zhang 
306b2b77a04SHong Zhang   PetscFunctionBegin;
307ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
308f4fca6d4SMatthew G. Knepley   ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr);
309b2b77a04SHong Zhang   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
310b2b77a04SHong Zhang   if (!fftw->p_backward) { /* create a plan, then excute it */
311b2b77a04SHong Zhang     switch (ndim) {
312b2b77a04SHong Zhang     case 1:
3133c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
314b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
315ae0a50aaSHong Zhang #else
3164f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
3173c3a9c75SAmlan Barua #endif
318b2b77a04SHong Zhang       break;
319b2b77a04SHong Zhang     case 2:
32097504071SAmlan Barua #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
321b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3223c3a9c75SAmlan Barua #else
3233c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3243c3a9c75SAmlan Barua #endif
325b2b77a04SHong Zhang       break;
326b2b77a04SHong Zhang     case 3:
3273c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
328b2b77a04SHong Zhang       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3293c3a9c75SAmlan Barua #else
3303c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3313c3a9c75SAmlan Barua #endif
332b2b77a04SHong Zhang       break;
333b2b77a04SHong Zhang     default:
3343c3a9c75SAmlan Barua #if defined(PETSC_USE_COMPLEX)
335c92418ccSAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
3363c3a9c75SAmlan Barua #else
3373c3a9c75SAmlan Barua       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
3383c3a9c75SAmlan Barua #endif
339b2b77a04SHong Zhang       break;
340b2b77a04SHong Zhang     }
341f4fca6d4SMatthew G. Knepley     fftw->binarray  = (PetscScalar *) x_array;
342b2b77a04SHong Zhang     fftw->boutarray = y_array;
343*2f613bf5SBarry Smith     fftw_execute(fftw->p_backward);
344b2b77a04SHong Zhang   } else { /* use existing plan */
345b2b77a04SHong Zhang     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
346b2b77a04SHong Zhang       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
347b2b77a04SHong Zhang     } else {
348*2f613bf5SBarry Smith       fftw_execute(fftw->p_backward);
349b2b77a04SHong Zhang     }
350b2b77a04SHong Zhang   }
351b2b77a04SHong Zhang   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
352f4fca6d4SMatthew G. Knepley   ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr);
353b2b77a04SHong Zhang   PetscFunctionReturn(0);
354b2b77a04SHong Zhang }
355b2b77a04SHong Zhang 
356b2b77a04SHong Zhang PetscErrorCode MatDestroy_FFTW(Mat A)
357b2b77a04SHong Zhang {
358b2b77a04SHong Zhang   Mat_FFT        *fft  = (Mat_FFT*)A->data;
359b2b77a04SHong Zhang   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
360b2b77a04SHong Zhang   PetscErrorCode ierr;
361b2b77a04SHong Zhang 
362b2b77a04SHong Zhang   PetscFunctionBegin;
363b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_forward);
364b2b77a04SHong Zhang   fftw_destroy_plan(fftw->p_backward);
3658c1d5ab3SHong Zhang   if (fftw->iodims) {
3668c1d5ab3SHong Zhang     free(fftw->iodims);
3678c1d5ab3SHong Zhang   }
368bb5bf6f6SHong Zhang   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
369bf0cc555SLisandro Dalcin   ierr = PetscFree(fft->data);CHKERRQ(ierr);
3706ccf0b3eSHong Zhang   fftw_mpi_cleanup();
371b2b77a04SHong Zhang   PetscFunctionReturn(0);
372b2b77a04SHong Zhang }
373b2b77a04SHong Zhang 
374c6db04a5SJed Brown #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
375b2b77a04SHong Zhang PetscErrorCode VecDestroy_MPIFFTW(Vec v)
376b2b77a04SHong Zhang {
377b2b77a04SHong Zhang   PetscErrorCode ierr;
378b2b77a04SHong Zhang   PetscScalar    *array;
379b2b77a04SHong Zhang 
380b2b77a04SHong Zhang   PetscFunctionBegin;
381b2b77a04SHong Zhang   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
382*2f613bf5SBarry Smith   fftw_free((fftw_complex*)array);
383b2b77a04SHong Zhang   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
384b2b77a04SHong Zhang   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
385b2b77a04SHong Zhang   PetscFunctionReturn(0);
386b2b77a04SHong Zhang }
387b2b77a04SHong Zhang 
3885b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new)
3895b113e21Ss-sajid-ali {
3905b113e21Ss-sajid-ali   PetscErrorCode ierr;
3915b113e21Ss-sajid-ali   Mat            A;
3925b113e21Ss-sajid-ali 
3935b113e21Ss-sajid-ali   PetscFunctionBegin;
3945b113e21Ss-sajid-ali   ierr = PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr);
3955b113e21Ss-sajid-ali   ierr = MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);CHKERRQ(ierr);
3965b113e21Ss-sajid-ali   PetscFunctionReturn(0);
3975b113e21Ss-sajid-ali }
3985b113e21Ss-sajid-ali 
3995b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new)
4005b113e21Ss-sajid-ali {
4015b113e21Ss-sajid-ali   PetscErrorCode ierr;
4025b113e21Ss-sajid-ali   Mat            A;
4035b113e21Ss-sajid-ali 
4045b113e21Ss-sajid-ali   PetscFunctionBegin;
4055b113e21Ss-sajid-ali   ierr = PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr);
4065b113e21Ss-sajid-ali   ierr = MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);CHKERRQ(ierr);
4075b113e21Ss-sajid-ali   PetscFunctionReturn(0);
4085b113e21Ss-sajid-ali }
4095b113e21Ss-sajid-ali 
4105b113e21Ss-sajid-ali static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new)
4115b113e21Ss-sajid-ali {
4125b113e21Ss-sajid-ali   PetscErrorCode ierr;
4135b113e21Ss-sajid-ali   Mat            A;
4145b113e21Ss-sajid-ali 
4155b113e21Ss-sajid-ali   PetscFunctionBegin;
4165b113e21Ss-sajid-ali   ierr = PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr);
4175b113e21Ss-sajid-ali   ierr = MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);CHKERRQ(ierr);
4185b113e21Ss-sajid-ali   PetscFunctionReturn(0);
4195b113e21Ss-sajid-ali }
4205b113e21Ss-sajid-ali 
4214be45526SHong Zhang /*@
4222a7a6963SBarry Smith    MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
4234f7415efSAmlan Barua      parallel layout determined by FFTW
4244f7415efSAmlan Barua 
4254f7415efSAmlan Barua    Collective on Mat
4264f7415efSAmlan Barua 
4274f7415efSAmlan Barua    Input Parameter:
4283564f093SHong Zhang .   A - the matrix
4294f7415efSAmlan Barua 
430d8d19677SJose E. Roman    Output Parameters:
431cc55f3b1SHong Zhang +   x - (optional) input vector of forward FFTW
4326b867d5aSJose E. Roman .   y - (optional) output vector of forward FFTW
433cc55f3b1SHong Zhang -   z - (optional) output vector of backward FFTW
4344f7415efSAmlan Barua 
4354f7415efSAmlan Barua   Level: advanced
4363564f093SHong Zhang 
43797504071SAmlan Barua   Note: The parallel layout of output of forward FFTW is always same as the input
43897504071SAmlan Barua         of the backward FFTW. But parallel layout of the input vector of forward
43997504071SAmlan Barua         FFTW might not be same as the output of backward FFTW.
44097504071SAmlan Barua         Also note that we need to provide enough space while doing parallel real transform.
44197504071SAmlan Barua         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
44297504071SAmlan Barua         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
443e0ec536eSAmlan Barua         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
444e0ec536eSAmlan Barua         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
445e0ec536eSAmlan Barua         zeros if it is odd only one zero is needed.
446e0ec536eSAmlan Barua         Lastly one needs some scratch space at the end of data set in each process. alloc_local
447e0ec536eSAmlan Barua         figures out how much space is needed, i.e. it figures out the data+scratch space for
448e0ec536eSAmlan Barua         each processor and returns that.
4494f7415efSAmlan Barua 
45075f45d78SBarry Smith .seealso: MatCreateFFT()
4514be45526SHong Zhang @*/
4522a7a6963SBarry Smith PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
4534be45526SHong Zhang {
4544be45526SHong Zhang   PetscErrorCode ierr;
455b4c3921fSHong Zhang 
4564be45526SHong Zhang   PetscFunctionBegin;
457163d334eSBarry Smith   ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
4584be45526SHong Zhang   PetscFunctionReturn(0);
4594be45526SHong Zhang }
4604be45526SHong Zhang 
4612a7a6963SBarry Smith PetscErrorCode  MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
4624f7415efSAmlan Barua {
4634f7415efSAmlan Barua   PetscErrorCode ierr;
4644f7415efSAmlan Barua   PetscMPIInt    size,rank;
465ce94432eSBarry Smith   MPI_Comm       comm;
4664f7415efSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
4674f7415efSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
4689496c9aeSAmlan Barua   PetscInt       N     = fft->N;
4694f7415efSAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
4704f7415efSAmlan Barua 
4714f7415efSAmlan Barua   PetscFunctionBegin;
4724f7415efSAmlan Barua   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
4734f7415efSAmlan Barua   PetscValidType(A,1);
474ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4754f7415efSAmlan Barua 
476ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
477ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
4784f7415efSAmlan Barua   if (size == 1) { /* sequential case */
4794f7415efSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4804f7415efSAmlan Barua     if (fin)  {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
4814f7415efSAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
4824f7415efSAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
4834f7415efSAmlan Barua #else
4848ca4c034SAmlan Barua     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
4858ca4c034SAmlan Barua     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
4868ca4c034SAmlan Barua     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
4874f7415efSAmlan Barua #endif
48897504071SAmlan Barua   } else { /* parallel cases */
4894f7415efSAmlan Barua     ptrdiff_t    alloc_local,local_n0,local_0_start;
4909496c9aeSAmlan Barua     ptrdiff_t    local_n1;
4919496c9aeSAmlan Barua     fftw_complex *data_fout;
4929496c9aeSAmlan Barua     ptrdiff_t    local_1_start;
4939496c9aeSAmlan Barua #if defined(PETSC_USE_COMPLEX)
4949496c9aeSAmlan Barua     fftw_complex *data_fin,*data_bout;
4959496c9aeSAmlan Barua #else
4964f7415efSAmlan Barua     double    *data_finr,*data_boutr;
4976ccf0b3eSHong Zhang     PetscInt  n1,N1;
4989496c9aeSAmlan Barua     ptrdiff_t temp;
4999496c9aeSAmlan Barua #endif
5009496c9aeSAmlan Barua 
5014f7415efSAmlan Barua     switch (ndim) {
5024f7415efSAmlan Barua     case 1:
50357625b84SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5044f8276c3SHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
50557625b84SAmlan Barua #else
50657625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
50757625b84SAmlan Barua       if (fin) {
50857625b84SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
509778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5105b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5115b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
51257625b84SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
51357625b84SAmlan Barua       }
51457625b84SAmlan Barua       if (fout) {
51557625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
516778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5175b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5185b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
51957625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
52057625b84SAmlan Barua       }
52157625b84SAmlan Barua       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
52257625b84SAmlan Barua       if (bout) {
52357625b84SAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
524778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5255b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5265b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_fout;
52757625b84SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
52857625b84SAmlan Barua       }
52957625b84SAmlan Barua       break;
53057625b84SAmlan Barua #endif
5314f7415efSAmlan Barua     case 2:
53297504071SAmlan Barua #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
5334f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5344f7415efSAmlan Barua       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
5354f7415efSAmlan Barua       if (fin) {
5364f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
537778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5385b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5395b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5404f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5414f7415efSAmlan Barua       }
54257625b84SAmlan Barua       if (fout) {
54357625b84SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
544778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5455b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5465b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
54757625b84SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
54857625b84SAmlan Barua       }
5495b113e21Ss-sajid-ali       if (bout) {
5505b113e21Ss-sajid-ali         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
5515b113e21Ss-sajid-ali         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
5525b113e21Ss-sajid-ali         ierr       = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5535b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5545b113e21Ss-sajid-ali         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5555b113e21Ss-sajid-ali       }
5564f7415efSAmlan Barua #else
5574f7415efSAmlan Barua       /* Get local size */
5584f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
5594f7415efSAmlan Barua       if (fin) {
5604f7415efSAmlan Barua         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
561778a2246SBarry Smith         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
5625b113e21Ss-sajid-ali         ierr     = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5635b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5644f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5654f7415efSAmlan Barua       }
5664f7415efSAmlan Barua       if (fout) {
5674f7415efSAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
568778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5695b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5705b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5714f7415efSAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5724f7415efSAmlan Barua       }
5734f7415efSAmlan Barua       if (bout) {
5744f7415efSAmlan Barua         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
575778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
5765b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5775b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
5784f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
5794f7415efSAmlan Barua       }
5804f7415efSAmlan Barua #endif
5814f7415efSAmlan Barua       break;
5824f7415efSAmlan Barua     case 3:
5834f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
5844f7415efSAmlan Barua       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
5854f7415efSAmlan Barua       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
5864f7415efSAmlan Barua       if (fin) {
5874f7415efSAmlan Barua         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
588778a2246SBarry Smith         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
5895b113e21Ss-sajid-ali         ierr      = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5905b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
5914f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
5924f7415efSAmlan Barua       }
5935b113e21Ss-sajid-ali       if (fout) {
5945b113e21Ss-sajid-ali         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
5955b113e21Ss-sajid-ali         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
5965b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
5975b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
5985b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
5995b113e21Ss-sajid-ali       }
6004f7415efSAmlan Barua       if (bout) {
6014f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
602778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
6035b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6045b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6054f7415efSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6064f7415efSAmlan Barua       }
6074f7415efSAmlan Barua #else
6080c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
6090c9b18e4SAmlan Barua       if (fin) {
6100c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
611778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6125b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6135b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6140c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6150c9b18e4SAmlan Barua       }
6160c9b18e4SAmlan Barua       if (fout) {
6170c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
618778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6195b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6205b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6210c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6220c9b18e4SAmlan Barua       }
6230c9b18e4SAmlan Barua       if (bout) {
6240c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
625778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
6265b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6275b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6280c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6290c9b18e4SAmlan Barua       }
6304f7415efSAmlan Barua #endif
6314f7415efSAmlan Barua       break;
6324f7415efSAmlan Barua     default:
6334f7415efSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
6344f7415efSAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
63526fbe8dcSKarl Rupp 
6364f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
63726fbe8dcSKarl Rupp 
6384f7415efSAmlan Barua       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
6394f7415efSAmlan Barua       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
64026fbe8dcSKarl Rupp 
6414f7415efSAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
6424f7415efSAmlan Barua 
6434f7415efSAmlan Barua       if (fin) {
6444f7415efSAmlan Barua         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
645778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
6465b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6475b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6484f7415efSAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6494f7415efSAmlan Barua       }
6505b113e21Ss-sajid-ali       if (fout) {
6515b113e21Ss-sajid-ali         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
6525b113e21Ss-sajid-ali         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6535b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6545b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6555b113e21Ss-sajid-ali         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6565b113e21Ss-sajid-ali       }
6574f7415efSAmlan Barua       if (bout) {
6584f7415efSAmlan Barua         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
659778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
6605b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6615b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6629496c9aeSAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6634f7415efSAmlan Barua       }
6644f7415efSAmlan Barua #else
6650c9b18e4SAmlan Barua       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
6660c9b18e4SAmlan Barua       if (fin) {
6670c9b18e4SAmlan Barua         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
668778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
6695b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6705b113e21Ss-sajid-ali         (*fin)->ops->duplicate = VecDuplicate_FFTW_fin;
6710c9b18e4SAmlan Barua         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
6720c9b18e4SAmlan Barua       }
6730c9b18e4SAmlan Barua       if (fout) {
6740c9b18e4SAmlan Barua         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
675778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
6765b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6775b113e21Ss-sajid-ali         (*fout)->ops->duplicate = VecDuplicate_FFTW_fout;
6780c9b18e4SAmlan Barua         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
6790c9b18e4SAmlan Barua       }
6800c9b18e4SAmlan Barua       if (bout) {
6810c9b18e4SAmlan Barua         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
682778a2246SBarry Smith         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
6835b113e21Ss-sajid-ali         ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr);
6845b113e21Ss-sajid-ali         (*bout)->ops->duplicate = VecDuplicate_FFTW_bout;
6850c9b18e4SAmlan Barua         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
6860c9b18e4SAmlan Barua       }
6874f7415efSAmlan Barua #endif
6884f7415efSAmlan Barua       break;
6894f7415efSAmlan Barua     }
690f9d91177SJunchao Zhang     /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but
691f9d91177SJunchao Zhang        v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes
692f9d91177SJunchao Zhang        memory leaks. We void these pointers here to avoid acident uses.
693f9d91177SJunchao Zhang      */
694f9d91177SJunchao Zhang     if (fin)  (*fin)->ops->replacearray = NULL;
695f9d91177SJunchao Zhang     if (fout) (*fout)->ops->replacearray = NULL;
696f9d91177SJunchao Zhang     if (bout) (*bout)->ops->replacearray = NULL;
6974f7415efSAmlan Barua   }
6984f7415efSAmlan Barua   PetscFunctionReturn(0);
6994f7415efSAmlan Barua }
7004f7415efSAmlan Barua 
7013564f093SHong Zhang /*@
7023564f093SHong Zhang    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
70354efbe56SHong Zhang 
7043564f093SHong Zhang    Collective on Mat
7053564f093SHong Zhang 
7063564f093SHong Zhang    Input Parameters:
7073564f093SHong Zhang +  A - FFTW matrix
7083564f093SHong Zhang -  x - the PETSc vector
7093564f093SHong Zhang 
7103564f093SHong Zhang    Output Parameters:
7113564f093SHong Zhang .  y - the FFTW vector
7123564f093SHong Zhang 
713b2b77a04SHong Zhang   Options Database Keys:
7143564f093SHong Zhang . -mat_fftw_plannerflags - set FFTW planner flags
715b2b77a04SHong Zhang 
716b2b77a04SHong Zhang    Level: intermediate
7173564f093SHong Zhang 
71897504071SAmlan Barua    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
71997504071SAmlan Barua          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
72097504071SAmlan Barua          zeros. This routine does that job by scattering operation.
72197504071SAmlan Barua 
7223564f093SHong Zhang .seealso: VecScatterFFTWToPetsc()
7233564f093SHong Zhang @*/
7243564f093SHong Zhang PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
7253564f093SHong Zhang {
7263564f093SHong Zhang   PetscErrorCode ierr;
727b2b77a04SHong Zhang 
7283564f093SHong Zhang   PetscFunctionBegin;
729163d334eSBarry Smith   ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
7303564f093SHong Zhang   PetscFunctionReturn(0);
7313564f093SHong Zhang }
7323c3a9c75SAmlan Barua 
73374a26884SAmlan Barua PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
7343c3a9c75SAmlan Barua {
7353c3a9c75SAmlan Barua   PetscErrorCode ierr;
736ce94432eSBarry Smith   MPI_Comm       comm;
7373c3a9c75SAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
7383c3a9c75SAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
7399496c9aeSAmlan Barua   PetscInt       N     =fft->N;
740b5d11533SAmlan Barua   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
7419496c9aeSAmlan Barua   PetscInt       low;
7427a21806cSHong Zhang   PetscMPIInt    rank,size;
7437a21806cSHong Zhang   PetscInt       vsize,vsize1;
7447a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
7459496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
7463c3a9c75SAmlan Barua   VecScatter     vecscat;
7473c3a9c75SAmlan Barua   IS             list1,list2;
7489496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
7499496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
7509496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
751a31b9140SHong Zhang   PetscInt       NM;
7529496c9aeSAmlan Barua   ptrdiff_t      temp;
7539496c9aeSAmlan Barua #endif
7543c3a9c75SAmlan Barua 
7553564f093SHong Zhang   PetscFunctionBegin;
756ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
757ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
758ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
7591e1ea65dSPierre Jolivet   ierr = VecGetOwnershipRange(y,&low,NULL);CHKERRQ(ierr);
7603c3a9c75SAmlan Barua 
7613564f093SHong Zhang   if (size==1) {
7628ca4c034SAmlan Barua     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
7638ca4c034SAmlan Barua     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
7649496c9aeSAmlan Barua     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
7659448b7f1SJunchao Zhang     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
7666971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7676971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
7686971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
769b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
7703564f093SHong Zhang   } else {
7713c3a9c75SAmlan Barua     switch (ndim) {
7723c3a9c75SAmlan Barua     case 1:
77364657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7747a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
77526fbe8dcSKarl Rupp 
7761e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);CHKERRQ(ierr);
7771e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0,low,1,&list2);CHKERRQ(ierr);
7789448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
77964657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
78064657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
78164657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
78264657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
78364657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
78464657d84SAmlan Barua #else
785e5d7f247SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
78664657d84SAmlan Barua #endif
7873c3a9c75SAmlan Barua       break;
7883c3a9c75SAmlan Barua     case 2:
789bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
7907a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
79126fbe8dcSKarl Rupp 
7921e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);CHKERRQ(ierr);
7931e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);CHKERRQ(ierr);
7949448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
795bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
796bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
797bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
798bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
799bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
800bd59e6c6SAmlan Barua #else
801c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
80226fbe8dcSKarl Rupp 
803854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
804854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
8053c3a9c75SAmlan Barua 
8063564f093SHong Zhang       if (dim[1]%2==0) {
8073c3a9c75SAmlan Barua         NM = dim[1]+2;
8083564f093SHong Zhang       } else {
8093c3a9c75SAmlan Barua         NM = dim[1]+1;
8103564f093SHong Zhang       }
8113c3a9c75SAmlan Barua       for (i=0; i<local_n0; i++) {
8123c3a9c75SAmlan Barua         for (j=0; j<dim[1]; j++) {
8133c3a9c75SAmlan Barua           tempindx  = i*dim[1] + j;
8143c3a9c75SAmlan Barua           tempindx1 = i*NM + j;
81526fbe8dcSKarl Rupp 
8165b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
8173c3a9c75SAmlan Barua           indx2[tempindx]=low+tempindx1;
8183c3a9c75SAmlan Barua         }
8193c3a9c75SAmlan Barua       }
8203c3a9c75SAmlan Barua 
8213c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
8223c3a9c75SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8233c3a9c75SAmlan Barua 
8249448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
825f76f76beSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826f76f76beSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827f76f76beSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
828b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
829b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
830b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
831b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
832bd59e6c6SAmlan Barua #endif
8339496c9aeSAmlan Barua       break;
8343c3a9c75SAmlan Barua 
8353c3a9c75SAmlan Barua     case 3:
836bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8377a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
83826fbe8dcSKarl Rupp 
8391e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);CHKERRQ(ierr);
8401e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);CHKERRQ(ierr);
8419448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
842bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
844bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
845bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
846bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
847bd59e6c6SAmlan Barua #else
848c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
849f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform");
8507a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
85126fbe8dcSKarl Rupp 
852854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
853854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
85451d1eed7SAmlan Barua 
855db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
856db4deed7SKarl Rupp       else             NM = dim[2]+1;
85751d1eed7SAmlan Barua 
85851d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
85951d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
86051d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
86151d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
86251d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
86326fbe8dcSKarl Rupp 
86451d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
86551d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
86651d1eed7SAmlan Barua           }
86751d1eed7SAmlan Barua         }
86851d1eed7SAmlan Barua       }
86951d1eed7SAmlan Barua 
87051d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
87151d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
8729448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
87351d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
87451d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
87551d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
876b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
877b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
878b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
879b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
880bd59e6c6SAmlan Barua #endif
8819496c9aeSAmlan Barua       break;
8823c3a9c75SAmlan Barua 
8833c3a9c75SAmlan Barua     default:
884bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
8857a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
88626fbe8dcSKarl Rupp 
8871e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);CHKERRQ(ierr);
8881e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);CHKERRQ(ierr);
8899448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
890bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
891bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
893bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
894bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
895bd59e6c6SAmlan Barua #else
896c4762a1bSJed Brown       /* buggy, needs to be fixed. See src/mat/tests/ex158.c */
897f1ade23cSHong Zhang       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform");
898e5d7f247SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
89926fbe8dcSKarl Rupp 
900e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
90126fbe8dcSKarl Rupp 
9027a21806cSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
90326fbe8dcSKarl Rupp 
904e5d7f247SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
905e5d7f247SAmlan Barua 
906e5d7f247SAmlan Barua       partial_dim = fftw->partial_dim;
907e5d7f247SAmlan Barua 
908854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
909854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
910e5d7f247SAmlan Barua 
911db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
912db4deed7SKarl Rupp       else                  NM = 1;
913e5d7f247SAmlan Barua 
9146971673cSAmlan Barua       j = low;
9153564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
9166971673cSAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
9176971673cSAmlan Barua         indx2[i] = j;
91826fbe8dcSKarl Rupp         if (k%dim[ndim-1]==0) j+=NM;
9196971673cSAmlan Barua         j++;
9206971673cSAmlan Barua       }
9216971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
9226971673cSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
9239448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
9246971673cSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9256971673cSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9266971673cSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
927b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
928b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
929b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
930b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
931bd59e6c6SAmlan Barua #endif
9329496c9aeSAmlan Barua       break;
9333c3a9c75SAmlan Barua     }
934e81bb599SAmlan Barua   }
9353564f093SHong Zhang   PetscFunctionReturn(0);
9363c3a9c75SAmlan Barua }
9373c3a9c75SAmlan Barua 
9383564f093SHong Zhang /*@
9393564f093SHong Zhang    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
9403564f093SHong Zhang 
9413564f093SHong Zhang    Collective on Mat
9423564f093SHong Zhang 
9433564f093SHong Zhang     Input Parameters:
9443564f093SHong Zhang +   A - FFTW matrix
9453564f093SHong Zhang -   x - FFTW vector
9463564f093SHong Zhang 
9473564f093SHong Zhang    Output Parameters:
9483564f093SHong Zhang .  y - PETSc vector
9493564f093SHong Zhang 
9503564f093SHong Zhang    Level: intermediate
9513564f093SHong Zhang 
9523564f093SHong Zhang    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
9533564f093SHong Zhang          VecScatterFFTWToPetsc removes those extra zeros.
9543564f093SHong Zhang 
9553564f093SHong Zhang .seealso: VecScatterPetscToFFTW()
9563564f093SHong Zhang @*/
95774a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
9583c3a9c75SAmlan Barua {
9593c3a9c75SAmlan Barua   PetscErrorCode ierr;
9605fd66863SKarl Rupp 
9613c3a9c75SAmlan Barua   PetscFunctionBegin;
962163d334eSBarry Smith   ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
9633c3a9c75SAmlan Barua   PetscFunctionReturn(0);
9643c3a9c75SAmlan Barua }
96554efbe56SHong Zhang 
96674a26884SAmlan Barua PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
9675b3e41ffSAmlan Barua {
9685b3e41ffSAmlan Barua   PetscErrorCode ierr;
969ce94432eSBarry Smith   MPI_Comm       comm;
9705b3e41ffSAmlan Barua   Mat_FFT        *fft  = (Mat_FFT*)A->data;
9715b3e41ffSAmlan Barua   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
9729496c9aeSAmlan Barua   PetscInt       N     = fft->N;
973b3655f67SAmlan Barua   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
9749496c9aeSAmlan Barua   PetscInt       low;
9757a21806cSHong Zhang   PetscMPIInt    rank,size;
9767a21806cSHong Zhang   ptrdiff_t      local_n0,local_0_start;
9779496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
9785b3e41ffSAmlan Barua   VecScatter     vecscat;
9795b3e41ffSAmlan Barua   IS             list1,list2;
9809496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
9819496c9aeSAmlan Barua   PetscInt       i,j,k,partial_dim;
9829496c9aeSAmlan Barua   PetscInt       *indx1, *indx2, tempindx, tempindx1;
983a31b9140SHong Zhang   PetscInt       NM;
9849496c9aeSAmlan Barua   ptrdiff_t      temp;
9859496c9aeSAmlan Barua #endif
9869496c9aeSAmlan Barua 
9873564f093SHong Zhang   PetscFunctionBegin;
988ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
989ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
990ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
9910298fd71SBarry Smith   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
9925b3e41ffSAmlan Barua 
993e81bb599SAmlan Barua   if (size==1) {
994b3655f67SAmlan Barua     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
9959448b7f1SJunchao Zhang     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
9966971673cSAmlan Barua     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9976971673cSAmlan Barua     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
9986971673cSAmlan Barua     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
999b223cc97SAmlan Barua     ierr = ISDestroy(&list1);CHKERRQ(ierr);
1000e81bb599SAmlan Barua 
10013564f093SHong Zhang   } else {
1002e81bb599SAmlan Barua     switch (ndim) {
1003e81bb599SAmlan Barua     case 1:
100464657d84SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10057a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
100626fbe8dcSKarl Rupp 
10071e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);CHKERRQ(ierr);
10081e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n1,low,1,&list2);CHKERRQ(ierr);
10099448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
101064657d84SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
101164657d84SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
101264657d84SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
101364657d84SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
101464657d84SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
101564657d84SAmlan Barua #else
10166a506ed8SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
101764657d84SAmlan Barua #endif
10185b3e41ffSAmlan Barua       break;
10195b3e41ffSAmlan Barua     case 2:
1020bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10217a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
102226fbe8dcSKarl Rupp 
10231e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);CHKERRQ(ierr);
10241e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);CHKERRQ(ierr);
10259448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1026bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1029bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1030bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1031bd59e6c6SAmlan Barua #else
10327a21806cSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
103326fbe8dcSKarl Rupp 
1034854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
1035854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
10365b3e41ffSAmlan Barua 
1037db4deed7SKarl Rupp       if (dim[1]%2==0) NM = dim[1]+2;
1038db4deed7SKarl Rupp       else             NM = dim[1]+1;
10395b3e41ffSAmlan Barua 
10405b3e41ffSAmlan Barua       for (i=0; i<local_n0; i++) {
10415b3e41ffSAmlan Barua         for (j=0; j<dim[1]; j++) {
10425b3e41ffSAmlan Barua           tempindx = i*dim[1] + j;
10435b3e41ffSAmlan Barua           tempindx1 = i*NM + j;
104426fbe8dcSKarl Rupp 
10455b3e41ffSAmlan Barua           indx1[tempindx]=local_0_start*dim[1]+tempindx;
10465b3e41ffSAmlan Barua           indx2[tempindx]=low+tempindx1;
10475b3e41ffSAmlan Barua         }
10485b3e41ffSAmlan Barua       }
10495b3e41ffSAmlan Barua 
10505b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
10515b3e41ffSAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
10525b3e41ffSAmlan Barua 
10539448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
10545b3e41ffSAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10555b3e41ffSAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
10565b3e41ffSAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1057b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1058b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1059b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1060b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1061bd59e6c6SAmlan Barua #endif
10629496c9aeSAmlan Barua       break;
10635b3e41ffSAmlan Barua     case 3:
1064bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
10657a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
106626fbe8dcSKarl Rupp 
10671e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);CHKERRQ(ierr);
10681e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);CHKERRQ(ierr);
10699448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1070bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1071bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1072bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1073bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1074bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1075bd59e6c6SAmlan Barua #else
10767a21806cSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
107726fbe8dcSKarl Rupp 
1078854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1079854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
108051d1eed7SAmlan Barua 
1081db4deed7SKarl Rupp       if (dim[2]%2==0) NM = dim[2]+2;
1082db4deed7SKarl Rupp       else             NM = dim[2]+1;
108351d1eed7SAmlan Barua 
108451d1eed7SAmlan Barua       for (i=0; i<local_n0; i++) {
108551d1eed7SAmlan Barua         for (j=0; j<dim[1]; j++) {
108651d1eed7SAmlan Barua           for (k=0; k<dim[2]; k++) {
108751d1eed7SAmlan Barua             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
108851d1eed7SAmlan Barua             tempindx1 = i*dim[1]*NM + j*NM + k;
108926fbe8dcSKarl Rupp 
109051d1eed7SAmlan Barua             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
109151d1eed7SAmlan Barua             indx2[tempindx]=low+tempindx1;
109251d1eed7SAmlan Barua           }
109351d1eed7SAmlan Barua         }
109451d1eed7SAmlan Barua       }
109551d1eed7SAmlan Barua 
109651d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
109751d1eed7SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
109851d1eed7SAmlan Barua 
10999448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
110051d1eed7SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
110151d1eed7SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
110251d1eed7SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1103b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1104b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1105b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1106b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1107bd59e6c6SAmlan Barua #endif
11089496c9aeSAmlan Barua       break;
11095b3e41ffSAmlan Barua     default:
1110bd59e6c6SAmlan Barua #if defined(PETSC_USE_COMPLEX)
11117a21806cSHong Zhang       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
111226fbe8dcSKarl Rupp 
11131e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);CHKERRQ(ierr);
11141e1ea65dSPierre Jolivet       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);CHKERRQ(ierr);
11159448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1116bd59e6c6SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1117bd59e6c6SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118bd59e6c6SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1119bd59e6c6SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1120bd59e6c6SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1121bd59e6c6SAmlan Barua #else
1122ba6e06d0SAmlan Barua       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
112326fbe8dcSKarl Rupp 
1124ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
112526fbe8dcSKarl Rupp 
1126c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
112726fbe8dcSKarl Rupp 
1128ba6e06d0SAmlan Barua       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1129ba6e06d0SAmlan Barua 
1130ba6e06d0SAmlan Barua       partial_dim = fftw->partial_dim;
1131ba6e06d0SAmlan Barua 
1132854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1133854ce69bSBarry Smith       ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1134ba6e06d0SAmlan Barua 
1135db4deed7SKarl Rupp       if (dim[ndim-1]%2==0) NM = 2;
1136db4deed7SKarl Rupp       else                  NM = 1;
1137ba6e06d0SAmlan Barua 
1138ba6e06d0SAmlan Barua       j = low;
11393564f093SHong Zhang       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1140ba6e06d0SAmlan Barua         indx1[i] = local_0_start*partial_dim + i;
1141ba6e06d0SAmlan Barua         indx2[i] = j;
11423564f093SHong Zhang         if (k%dim[ndim-1]==0) j+=NM;
1143ba6e06d0SAmlan Barua         j++;
1144ba6e06d0SAmlan Barua       }
1145ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1146ba6e06d0SAmlan Barua       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1147ba6e06d0SAmlan Barua 
11489448b7f1SJunchao Zhang       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1149ba6e06d0SAmlan Barua       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1150ba6e06d0SAmlan Barua       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1151ba6e06d0SAmlan Barua       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1152b223cc97SAmlan Barua       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1153b223cc97SAmlan Barua       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1154b223cc97SAmlan Barua       ierr = PetscFree(indx1);CHKERRQ(ierr);
1155b223cc97SAmlan Barua       ierr = PetscFree(indx2);CHKERRQ(ierr);
1156bd59e6c6SAmlan Barua #endif
11579496c9aeSAmlan Barua       break;
11585b3e41ffSAmlan Barua     }
1159e81bb599SAmlan Barua   }
11603564f093SHong Zhang   PetscFunctionReturn(0);
11615b3e41ffSAmlan Barua }
11625b3e41ffSAmlan Barua 
11633c3a9c75SAmlan Barua /*
11643564f093SHong Zhang     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
11653564f093SHong Zhang 
11663c3a9c75SAmlan Barua   Options Database Keys:
11673c3a9c75SAmlan Barua + -mat_fftw_plannerflags - set FFTW planner flags
11683c3a9c75SAmlan Barua 
11693c3a9c75SAmlan Barua    Level: intermediate
11703c3a9c75SAmlan Barua 
11713c3a9c75SAmlan Barua */
11728cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1173b2b77a04SHong Zhang {
1174b2b77a04SHong Zhang   PetscErrorCode ierr;
1175ce94432eSBarry Smith   MPI_Comm       comm;
1176b2b77a04SHong Zhang   Mat_FFT        *fft=(Mat_FFT*)A->data;
1177b2b77a04SHong Zhang   Mat_FFTW       *fftw;
1178b2b77a04SHong Zhang   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
11795274ed1bSHong Zhang   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
11805274ed1bSHong Zhang   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1181b2b77a04SHong Zhang   PetscBool      flg;
11824f48bc67SAmlan Barua   PetscInt       p_flag,partial_dim=1,ctr;
118311902ff2SHong Zhang   PetscMPIInt    size,rank;
11849496c9aeSAmlan Barua   ptrdiff_t      *pdim;
11859496c9aeSAmlan Barua   ptrdiff_t      local_n1,local_1_start;
11869496c9aeSAmlan Barua #if !defined(PETSC_USE_COMPLEX)
11879496c9aeSAmlan Barua   ptrdiff_t      temp;
11888ca4c034SAmlan Barua   PetscInt       N1,tot_dim;
11894f48bc67SAmlan Barua #else
11904f48bc67SAmlan Barua   PetscInt       n1;
11919496c9aeSAmlan Barua #endif
11929496c9aeSAmlan Barua 
1193b2b77a04SHong Zhang   PetscFunctionBegin;
1194ce94432eSBarry Smith   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1195ffc4695bSBarry Smith   ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr);
1196ffc4695bSBarry Smith   ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr);
1197c92418ccSAmlan Barua 
11981878d478SAmlan Barua   fftw_mpi_init();
119911902ff2SHong Zhang   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
120011902ff2SHong Zhang   pdim[0] = dim[0];
12018ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12028ca4c034SAmlan Barua   tot_dim = 2*dim[0];
12038ca4c034SAmlan Barua #endif
12043564f093SHong Zhang   for (ctr=1; ctr<ndim; ctr++) {
12056882372aSHong Zhang     partial_dim *= dim[ctr];
120611902ff2SHong Zhang     pdim[ctr]    = dim[ctr];
12078ca4c034SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
1208db4deed7SKarl Rupp     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1209db4deed7SKarl Rupp     else             tot_dim *= dim[ctr];
12108ca4c034SAmlan Barua #endif
12116882372aSHong Zhang   }
12123c3a9c75SAmlan Barua 
1213b2b77a04SHong Zhang   if (size == 1) {
1214e81bb599SAmlan Barua #if defined(PETSC_USE_COMPLEX)
1215b2b77a04SHong Zhang     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1216b2b77a04SHong Zhang     n    = N;
1217e81bb599SAmlan Barua #else
1218e81bb599SAmlan Barua     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1219e81bb599SAmlan Barua     n    = tot_dim;
1220e81bb599SAmlan Barua #endif
1221e81bb599SAmlan Barua 
1222b2b77a04SHong Zhang   } else {
12237a21806cSHong Zhang     ptrdiff_t local_n0,local_0_start;
1224b2b77a04SHong Zhang     switch (ndim) {
1225b2b77a04SHong Zhang     case 1:
12263c3a9c75SAmlan Barua #if !defined(PETSC_USE_COMPLEX)
12273c3a9c75SAmlan Barua       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1228e5d7f247SAmlan Barua #else
12297a21806cSHong Zhang       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
123026fbe8dcSKarl Rupp 
12316882372aSHong Zhang       n    = (PetscInt)local_n0;
12329496c9aeSAmlan Barua       n1   = (PetscInt)local_n1;
12339496c9aeSAmlan Barua       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1234e5d7f247SAmlan Barua #endif
1235b2b77a04SHong Zhang       break;
1236b2b77a04SHong Zhang     case 2:
12375b3e41ffSAmlan Barua #if defined(PETSC_USE_COMPLEX)
12387a21806cSHong Zhang       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1239b2b77a04SHong Zhang       /*
1240b2b77a04SHong Zhang        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
12410ec8b6e3SBarry Smith        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1242b2b77a04SHong Zhang        */
1243b2b77a04SHong Zhang       n    = (PetscInt)local_n0*dim[1];
1244b2b77a04SHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
12455b3e41ffSAmlan Barua #else
1246c3eba89aSHong Zhang       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
124726fbe8dcSKarl Rupp 
12485b3e41ffSAmlan Barua       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
12491e1ea65dSPierre Jolivet       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));CHKERRQ(ierr);
12505b3e41ffSAmlan Barua #endif
1251b2b77a04SHong Zhang       break;
1252b2b77a04SHong Zhang     case 3:
125351d1eed7SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12547a21806cSHong Zhang       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
125526fbe8dcSKarl Rupp 
12566882372aSHong Zhang       n    = (PetscInt)local_n0*dim[1]*dim[2];
12576882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
125851d1eed7SAmlan Barua #else
1259c3eba89aSHong Zhang       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
126026fbe8dcSKarl Rupp 
126151d1eed7SAmlan Barua       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
12621e1ea65dSPierre Jolivet       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));CHKERRQ(ierr);
126351d1eed7SAmlan Barua #endif
1264b2b77a04SHong Zhang       break;
1265b2b77a04SHong Zhang     default:
1266b3a17365SAmlan Barua #if defined(PETSC_USE_COMPLEX)
12677a21806cSHong Zhang       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
126826fbe8dcSKarl Rupp 
12696882372aSHong Zhang       n    = (PetscInt)local_n0*partial_dim;
12706882372aSHong Zhang       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1271b3a17365SAmlan Barua #else
1272b3a17365SAmlan Barua       temp = pdim[ndim-1];
127326fbe8dcSKarl Rupp 
1274b3a17365SAmlan Barua       pdim[ndim-1] = temp/2 + 1;
127526fbe8dcSKarl Rupp 
1276c3eba89aSHong Zhang       fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
127726fbe8dcSKarl Rupp 
1278b3a17365SAmlan Barua       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1279b3a17365SAmlan Barua       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
128026fbe8dcSKarl Rupp 
1281b3a17365SAmlan Barua       pdim[ndim-1] = temp;
128226fbe8dcSKarl Rupp 
1283c8a8a4f0SAmlan Barua       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1284b3a17365SAmlan Barua #endif
1285b2b77a04SHong Zhang       break;
1286b2b77a04SHong Zhang     }
1287b2b77a04SHong Zhang   }
12882277172eSMark Adams   free(pdim);
1289b2b77a04SHong Zhang   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1290b00a9115SJed Brown   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1291b2b77a04SHong Zhang   fft->data = (void*)fftw;
1292b2b77a04SHong Zhang 
1293b2b77a04SHong Zhang   fft->n            = n;
12940c74a584SJed Brown   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1295e5d7f247SAmlan Barua   fftw->partial_dim = partial_dim;
129626fbe8dcSKarl Rupp 
12975e806cb5SMatthew G. Knepley   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
12988c1d5ab3SHong Zhang   if (size == 1) {
1299a1b6d50cSHong Zhang #if defined(PETSC_USE_64BIT_INDICES)
1300a1b6d50cSHong Zhang     fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim);
1301a1b6d50cSHong Zhang #else
13028c1d5ab3SHong Zhang     fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim);
1303a1b6d50cSHong Zhang #endif
13048c1d5ab3SHong Zhang   }
130526fbe8dcSKarl Rupp 
1306b1301b2eSHong Zhang   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1307c92418ccSAmlan Barua 
1308f4259b30SLisandro Dalcin   fftw->p_forward  = NULL;
1309f4259b30SLisandro Dalcin   fftw->p_backward = NULL;
1310b2b77a04SHong Zhang   fftw->p_flag     = FFTW_ESTIMATE;
1311b2b77a04SHong Zhang 
1312b2b77a04SHong Zhang   if (size == 1) {
1313b2b77a04SHong Zhang     A->ops->mult          = MatMult_SeqFFTW;
1314b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1315b2b77a04SHong Zhang   } else {
1316b2b77a04SHong Zhang     A->ops->mult          = MatMult_MPIFFTW;
1317b2b77a04SHong Zhang     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1318b2b77a04SHong Zhang   }
1319b2b77a04SHong Zhang   fft->matdestroy = MatDestroy_FFTW;
1320b2b77a04SHong Zhang   A->assembled    = PETSC_TRUE;
13214a52bad8SHong Zhang   A->preallocated = PETSC_TRUE;
132226fbe8dcSKarl Rupp 
13232a7a6963SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr);
1324bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1325bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1326b2b77a04SHong Zhang 
1327b2b77a04SHong Zhang   /* get runtime options */
1328ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
13295274ed1bSHong Zhang   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
13305274ed1bSHong Zhang   if (flg) {
13315274ed1bSHong Zhang     fftw->p_flag = iplans[p_flag];
13325274ed1bSHong Zhang   }
13334a52bad8SHong Zhang   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1334b2b77a04SHong Zhang   PetscFunctionReturn(0);
1335b2b77a04SHong Zhang }
13363c3a9c75SAmlan Barua 
1337