xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision b223cc977cbead8f3dd4e85365fb7a637b32daab) !
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "MatMult_SeqFFTW"
31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32 {
33   PetscErrorCode ierr;
34   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36   PetscScalar    *x_array,*y_array;
37   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38 
39   PetscFunctionBegin;
40 //#if !defined(PETSC_USE_COMPLEX)
41 
42 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
43 //#endif
44   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
45   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
46   if (!fftw->p_forward){ /* create a plan, then excute it */
47     switch (ndim){
48     case 1:
49 #if defined(PETSC_USE_COMPLEX)
50       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
51 #else
52       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
53 #endif
54       break;
55     case 2:
56 #if defined(PETSC_USE_COMPLEX)
57       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
58 #else
59       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
60 #endif
61       break;
62     case 3:
63 #if defined(PETSC_USE_COMPLEX)
64       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
65 #else
66       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
67 #endif
68       break;
69     default:
70 #if defined(PETSC_USE_COMPLEX)
71       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
72 #else
73       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
74 #endif
75       break;
76     }
77     fftw->finarray  = x_array;
78     fftw->foutarray = y_array;
79     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
80                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
81     fftw_execute(fftw->p_forward);
82   } else { /* use existing plan */
83     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
84       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
85     } else {
86       fftw_execute(fftw->p_forward);
87     }
88   }
89   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
90   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
96 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
97 {
98   PetscErrorCode ierr;
99   Mat_FFT        *fft = (Mat_FFT*)A->data;
100   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
101   PetscScalar    *x_array,*y_array;
102   PetscInt       ndim=fft->ndim,*dim=fft->dim;
103 
104   PetscFunctionBegin;
105 //#if !defined(PETSC_USE_COMPLEX)
106 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
107 //#endif
108   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
109   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
110   if (!fftw->p_backward){ /* create a plan, then excute it */
111     switch (ndim){
112     case 1:
113 #if defined(PETSC_USE_COMPLEX)
114       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
115 #else
116       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
117 #endif
118       break;
119     case 2:
120 #if defined(PETSC_USE_COMPLEX)
121       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
122 #else
123       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
124 #endif
125       break;
126     case 3:
127 #if defined(PETSC_USE_COMPLEX)
128       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
129 #else
130       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
131 #endif
132       break;
133     default:
134 #if defined(PETSC_USE_COMPLEX)
135       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
136 #else
137       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
138 #endif
139       break;
140     }
141     fftw->binarray  = x_array;
142     fftw->boutarray = y_array;
143     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
144   } else { /* use existing plan */
145     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
146       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
147     } else {
148       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
149     }
150   }
151   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
152   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "MatMult_MPIFFTW"
158 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
159 {
160   PetscErrorCode ierr;
161   Mat_FFT        *fft  = (Mat_FFT*)A->data;
162   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
163   PetscScalar    *x_array,*y_array;
164   PetscInt       ndim=fft->ndim,*dim=fft->dim;
165   MPI_Comm       comm=((PetscObject)A)->comm;
166 // PetscInt ctr;
167 //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
168 //  ndim1=(ptrdiff_t) ndim;
169 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
170 
171 //  for(ctr=0;ctr<ndim;ctr++)
172 //     {
173 //      pdim[ctr] = dim[ctr];
174 //     }
175 
176   PetscFunctionBegin;
177 //#if !defined(PETSC_USE_COMPLEX)
178 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
179 //#endif
180 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
181 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
182 
183   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
184   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
185   if (!fftw->p_forward){ /* create a plan, then excute it */
186     switch (ndim){
187     case 1:
188 #if defined(PETSC_USE_COMPLEX)
189       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
190 #endif
191       break;
192     case 2:
193 #if defined(PETSC_USE_COMPLEX)
194       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
195 #else
196       printf("The code comes here \n");
197       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
198 #endif
199       break;
200     case 3:
201 #if defined(PETSC_USE_COMPLEX)
202       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
203 #else
204       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
205 #endif
206       break;
207     default:
208 #if defined(PETSC_USE_COMPLEX)
209       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
210 #else
211       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
212 #endif
213  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
214       break;
215     }
216     fftw->finarray  = x_array;
217     fftw->foutarray = y_array;
218     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
219                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
220     fftw_execute(fftw->p_forward);
221   } else { /* use existing plan */
222     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
223       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
224     } else {
225       fftw_execute(fftw->p_forward);
226     }
227   }
228   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
229   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
235 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
236 {
237   PetscErrorCode ierr;
238   Mat_FFT        *fft  = (Mat_FFT*)A->data;
239   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
240   PetscScalar    *x_array,*y_array;
241   PetscInt       ndim=fft->ndim,*dim=fft->dim;
242   MPI_Comm       comm=((PetscObject)A)->comm;
243 //  PetscInt       ctr;
244 //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
245 //  ndim1=(ptrdiff_t) ndim;
246 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
247 
248 //  for(ctr=0;ctr<ndim;ctr++)
249 //     {
250 //      pdim[ctr] = dim[ctr];
251 //     }
252 
253   PetscFunctionBegin;
254 //#if !defined(PETSC_USE_COMPLEX)
255 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
256 //#endif
257 //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
258 // should pdim be a member of Mat_FFTW?
259 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
260 
261   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
262   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
263   if (!fftw->p_backward){ /* create a plan, then excute it */
264     switch (ndim){
265     case 1:
266 #if defined(PETSC_USE_COMPLEX)
267       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
268 #endif
269       break;
270     case 2:
271 #if defined(PETSC_USE_COMPLEX)
272       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
273 #else
274       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
275 #endif
276       break;
277     case 3:
278 #if defined(PETSC_USE_COMPLEX)
279       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
280 #else
281       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
282 #endif
283       break;
284     default:
285 #if defined(PETSC_USE_COMPLEX)
286       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
287 #else
288       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
289 #endif
290 //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
291       break;
292     }
293     fftw->binarray  = x_array;
294     fftw->boutarray = y_array;
295     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
296   } else { /* use existing plan */
297     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
298       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
299     } else {
300       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
301     }
302   }
303   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
304   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatDestroy_FFTW"
310 PetscErrorCode MatDestroy_FFTW(Mat A)
311 {
312   Mat_FFT        *fft = (Mat_FFT*)A->data;
313   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317 //#if !defined(PETSC_USE_COMPLEX)
318 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
319 //#endif
320   fftw_destroy_plan(fftw->p_forward);
321   fftw_destroy_plan(fftw->p_backward);
322   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
323   ierr = PetscFree(fft->data);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
328 #undef __FUNCT__
329 #define __FUNCT__ "VecDestroy_MPIFFTW"
330 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
331 {
332   PetscErrorCode  ierr;
333   PetscScalar     *array;
334 
335   PetscFunctionBegin;
336 #if !defined(PETSC_USE_COMPLEX)
337   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
338 #endif
339   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
340   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
341   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
342   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "MatGetVecs1DC_FFTW"
348 /*
349    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
350      parallel layout determined by FFTW-1D
351 
352 */
353 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
354 {
355   PetscErrorCode ierr;
356   PetscMPIInt    size,rank;
357   MPI_Comm       comm=((PetscObject)A)->comm;
358   Mat_FFT        *fft = (Mat_FFT*)A->data;
359 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
360   PetscInt       N=fft->N;
361   PetscInt       ndim=fft->ndim,*dim=fft->dim;
362   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
363   ptrdiff_t      f_local_n1,f_local_1_end;
364   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
365   ptrdiff_t      b_local_n1,b_local_1_end;
366   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
367 
368   PetscFunctionBegin;
369 #if !defined(PETSC_USE_COMPLEX)
370   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
371 #endif
372   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
373   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
374   if (size == 1){
375     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
376   }
377   else {
378       if (ndim>1){
379         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
380       else {
381           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
382           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
383           if (fin) {
384             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
385             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
386             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
387           }
388           if (fout) {
389             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
390             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
391             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
392           }
393           if (bin) {
394             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
395             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
396             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
397           }
398           if (bout) {
399             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
400             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
401             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
402           }
403   }
404   if (fin){
405     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
406   }
407   if (fout){
408     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
409   }
410   if (bin){
411     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
412   }
413   if (bout){
414     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
415   }
416   PetscFunctionReturn(0);
417 }
418 
419 
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatGetVecs_FFTW"
424 /*
425    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
426      parallel layout determined by FFTW
427 
428    Collective on Mat
429 
430    Input Parameter:
431 .  mat - the matrix
432 
433    Output Parameter:
434 +   fin - (optional) input vector of forward FFTW
435 -   fout - (optional) output vector of forward FFTW
436 
437   Level: advanced
438 
439 .seealso: MatCreateFFTW()
440 */
441 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
442 {
443   PetscErrorCode ierr;
444   PetscMPIInt    size,rank;
445   MPI_Comm       comm=((PetscObject)A)->comm;
446   Mat_FFT        *fft = (Mat_FFT*)A->data;
447   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
448   PetscInt       N=fft->N, N1, n1,vsize;
449   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
450 
451   PetscFunctionBegin;
452 //#if !defined(PETSC_USE_COMPLEX)
453 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
454 //#endif
455   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
456   PetscValidType(A,1);
457 
458   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
459   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
460   if (size == 1){ /* sequential case */
461 #if defined(PETSC_USE_COMPLEX)
462     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
463     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
464 #else
465     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
466     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
467     printf("The code successfully comes at the end of the routine with one processor\n");
468 #endif
469   } else {        /* mpi case */
470     ptrdiff_t      alloc_local,local_n0,local_0_start;
471     ptrdiff_t      local_n1,local_1_end;
472     fftw_complex   *data_fin,*data_fout;
473     double         *data_finr ;
474     ptrdiff_t      local_1_start,temp;
475 //    PetscInt ctr;
476 //    ptrdiff_t      ndim1,*pdim;
477 //    ndim1=(ptrdiff_t) ndim;
478 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
479 
480 //    for(ctr=0;ctr<ndim;ctr++)
481 //        {k
482 //           pdim[ctr] = dim[ctr];
483 //       }
484 
485 
486 
487     switch (ndim){
488     case 1:
489       /* Get local size */
490       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
491 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
492       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
493       if (fin) {
494         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
495         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
496         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
497       }
498       if (fout) {
499         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
500         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
501         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
502       }
503       break;
504     case 2:
505 #if !defined(PETSC_USE_COMPLEX)
506       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
507       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
508       if (fin) {
509         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
510         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
511         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
512         //printf("The code comes here with vector size %d\n",vsize);
513         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
514       }
515       if (fout) {
516         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
517         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
518         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
519       }
520       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
521 
522 #else
523       /* Get local size */
524      printf("Hope this does not come here");
525       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
526       if (fin) {
527         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
528         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
529         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
530       }
531       if (fout) {
532         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
533         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
534         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
535       }
536      printf("Hope this does not come here");
537 #endif
538       break;
539     case 3:
540       /* Get local size */
541 #if !defined(PETSC_USE_COMPLEX)
542       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
543       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
544       if (fin) {
545         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
546         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
547         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
548         //printf("The code comes here with vector size %d\n",vsize);
549         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
550       }
551       if (fout) {
552         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
553         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
554         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
555       }
556       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
557 
558 
559 #else
560       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
561 //      printf("The quantity n is %d",n);
562       if (fin) {
563         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
564         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
565         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
566       }
567       if (fout) {
568         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
569         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
570         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
571       }
572 #endif
573       break;
574     default:
575       /* Get local size */
576 #if !defined(PETSC_USE_COMPLEX)
577       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
578       printf("The value of temp is %ld\n",temp);
579       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
580       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
581       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
582       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
583       if (fin) {
584         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
585         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
586         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
587         //printf("The code comes here with vector size %d\n",vsize);
588         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
589       }
590       if (fout) {
591         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
592         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
593         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
594       }
595 
596 #else
597       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
598 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
599 //      printf("The value of alloc local is %d",alloc_local);
600 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
601 //      for(i=0;i<ndim;i++)
602 //         {
603 //          pdim[i]=dim[i];printf("%d",pdim[i]);
604 //         }
605 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
606 //      printf("The quantity n is %d",n);
607       if (fin) {
608         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
609         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
610         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
611       }
612       if (fout) {
613         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
614         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
615         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
616       }
617 #endif
618       break;
619     }
620   }
621 //  if (fin){
622 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
623 //  }
624 //  if (fout){
625 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
626 //  }
627   PetscFunctionReturn(0);
628 }
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "InputTransformFFT"
632 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
633 {
634   PetscErrorCode ierr;
635   PetscFunctionBegin;
636   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
637   PetscFunctionReturn(0);
638 }
639 
640 /*
641       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
642   Input A, x, y
643   A - FFTW matrix
644   x - user data
645   Options Database Keys:
646 + -mat_fftw_plannerflags - set FFTW planner flags
647 
648    Level: intermediate
649 
650 */
651 
652 EXTERN_C_BEGIN
653 #undef __FUNCT__
654 #define __FUNCT__ "InputTransformFFT_FTTW"
655 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
656 {
657   PetscErrorCode ierr;
658   MPI_Comm       comm=((PetscObject)A)->comm;
659   Mat_FFT        *fft = (Mat_FFT*)A->data;
660   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
661   PetscInt       N=fft->N, N1, n1 ,NM;
662   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
663   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
664   PetscInt       i,j,k,rank,size,partial_dim;
665   ptrdiff_t      alloc_local,local_n0,local_0_start;
666   ptrdiff_t      local_n1,local_1_start,temp;
667   VecScatter     vecscat;
668   IS             list1,list2;
669 
670   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
671   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
672   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
673   printf("Local ownership starts at %d\n",low);
674 
675   if (size==1)
676     {
677      switch (ndim){
678      case 1:
679           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
680           for (i=0;i<dim[0];i++)
681              {
682               indx1[i] = i;
683              }
684           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
685           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
686           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
687           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
688           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
689           ierr = ISDestroy(&list1);CHKERRQ(ierr);
690           ierr = PetscFree(indx1);CHKERRQ(ierr);
691      break;
692 
693      case 2:
694           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
695           for (i=0;i<dim[0];i++){
696              for (j=0;j<dim[1];j++){
697                 indx1[i*dim[1]+j] = i*dim[1] + j;
698              }
699           }
700           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
701           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
702           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
703           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
704           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
705           ierr = ISDestroy(&list1);CHKERRQ(ierr);
706           ierr = PetscFree(indx1);CHKERRQ(ierr);
707           break;
708      case 3:
709           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
710           for (i=0;i<dim[0];i++){
711              for (j=0;j<dim[1];j++){
712                 for (k=0;k<dim[2];k++){
713                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
714                 }
715              }
716           }
717           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
718           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
719           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
721           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
722           ierr = ISDestroy(&list1);CHKERRQ(ierr);
723           ierr = PetscFree(indx1);CHKERRQ(ierr);
724           break;
725      default:
726           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
727           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
728           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
730           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
731           ierr = ISDestroy(&list1);CHKERRQ(ierr);
732           //ierr = ISDestroy(list1);CHKERRQ(ierr);
733           break;
734       }
735     }
736 
737  else{
738 
739  switch (ndim){
740  case 1:
741   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
742   break;
743  case 2:
744       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
745       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
746 
747       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
748       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
749       printf("Val local_0_start = %ld\n",local_0_start);
750 
751       if (dim[1]%2==0)
752         NM = dim[1]+2;
753       else
754         NM = dim[1]+1;
755 
756       for (i=0;i<local_n0;i++){
757          for (j=0;j<dim[1];j++){
758             tempindx = i*dim[1] + j;
759             tempindx1 = i*NM + j;
760             indx1[tempindx]=local_0_start*dim[1]+tempindx;
761             indx2[tempindx]=low+tempindx1;
762            // printf("Val tempindx1 = %d\n",tempindx1);
763   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
764   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
765   //          printf("-------------------------\n",indx2[tempindx],rank);
766         }
767      }
768 
769       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
770       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
771 
772       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
773       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
774       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
775       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
776       ierr = ISDestroy(&list1);CHKERRQ(ierr);
777       ierr = ISDestroy(&list2);CHKERRQ(ierr);
778       ierr = PetscFree(indx1);CHKERRQ(ierr);
779       ierr = PetscFree(indx2);CHKERRQ(ierr);
780       break;
781 
782  case 3:
783       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
784       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
785 
786       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
787       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
788       printf("Val local_0_start = %ld\n",local_0_start);
789 
790       if (dim[2]%2==0)
791         NM = dim[2]+2;
792       else
793         NM = dim[2]+1;
794 
795       for (i=0;i<local_n0;i++){
796          for (j=0;j<dim[1];j++){
797             for (k=0;k<dim[2];k++){
798                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
799                tempindx1 = i*dim[1]*NM + j*NM + k;
800                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
801                indx2[tempindx]=low+tempindx1;
802             }
803            // printf("Val tempindx1 = %d\n",tempindx1);
804            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
805            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
806            // printf("-------------------------\n",indx2[tempindx],rank);
807         }
808      }
809 
810       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
811       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
812 
813       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
814       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
815       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
816       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
817       ierr = ISDestroy(&list1);CHKERRQ(ierr);
818       ierr = ISDestroy(&list2);CHKERRQ(ierr);
819       ierr = PetscFree(indx1);CHKERRQ(ierr);
820       ierr = PetscFree(indx2);CHKERRQ(ierr);
821       break;
822 
823  default:
824       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
825       printf("The value of temp is %ld\n",temp);
826       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
827       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
828       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
829       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
830 
831       partial_dim = fftw->partial_dim;
832       printf("The value of partial dim is %d\n",partial_dim);
833 
834       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
835       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
836       printf("Val local_0_start = %ld\n",local_0_start);
837 
838       if (dim[ndim-1]%2==0)
839         NM = 2;
840       else
841         NM = 1;
842 
843       j = low;
844       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
845          {
846           indx1[i] = local_0_start*partial_dim + i;
847           indx2[i] = j;
848           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
849           if (k%dim[ndim-1]==0)
850             { j+=NM;}
851           j++;
852          }
853       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
854       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
855       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
856       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
857       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
859       ierr = ISDestroy(&list1);CHKERRQ(ierr);
860       ierr = ISDestroy(&list2);CHKERRQ(ierr);
861       ierr = PetscFree(indx1);CHKERRQ(ierr);
862       ierr = PetscFree(indx2);CHKERRQ(ierr);
863       break;
864   }
865  }
866 
867  return 0;
868 }
869 EXTERN_C_END
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "OutputTransformFFT"
873 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
874 {
875   PetscErrorCode ierr;
876   PetscFunctionBegin;
877   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
878   PetscFunctionReturn(0);
879 }
880 
881 /*
882       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
883   Input A, x, y
884   A - FFTW matrix
885   x - FFTW vector
886   y - PETSc vector that user can read
887   Options Database Keys:
888 + -mat_fftw_plannerflags - set FFTW planner flags
889 
890    Level: intermediate
891 
892 */
893 
894 EXTERN_C_BEGIN
895 #undef __FUNCT__
896 #define __FUNCT__ "OutputTransformFFT_FTTW"
897 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
898 {
899   PetscErrorCode ierr;
900   MPI_Comm       comm=((PetscObject)A)->comm;
901   Mat_FFT        *fft = (Mat_FFT*)A->data;
902   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
903   PetscInt       N=fft->N, N1, n1 ,NM;
904   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
905   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
906   PetscInt       i,j,k,rank,size,partial_dim;
907   ptrdiff_t      alloc_local,local_n0,local_0_start;
908   ptrdiff_t      local_n1,local_1_start,temp;
909   VecScatter     vecscat;
910   IS             list1,list2;
911 
912   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
913   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
914   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
915   printf("Local ownership starts at %d\n",low);
916 
917   if (size==1){
918     switch (ndim){
919     case 1:
920            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
921           for (i=0;i<dim[0];i++)
922              {
923               indx1[i] = i;
924              }
925           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
926           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
927           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
929           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
930           ierr = ISDestroy(&list1);CHKERRQ(ierr);
931           ierr = PetscFree(indx1);CHKERRQ(ierr);
932           break;
933 
934     case 2:
935          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
936           for (i=0;i<dim[0];i++){
937              for (j=0;j<dim[1];j++){
938                 indx1[i*dim[1]+j] = i*dim[1] + j;
939              }
940           }
941          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
942          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
943          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
946          ierr = ISDestroy(&list1);CHKERRQ(ierr);
947          ierr = PetscFree(indx1);CHKERRQ(ierr);
948          break;
949     case 3:
950          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
951          for (i=0;i<dim[0];i++){
952             for (j=0;j<dim[1];j++){
953                for (k=0;k<dim[2];k++){
954                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
955                }
956             }
957          }
958          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
959          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
960          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
963          ierr = ISDestroy(&list1);CHKERRQ(ierr);
964          ierr = PetscFree(indx1);CHKERRQ(ierr);
965          break;
966     default:
967          ierr = ISCreateStride(comm,N,0,1,&list1);
968          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
969          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
970          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
971          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
972          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
973          ierr = ISDestroy(&list1);CHKERRQ(ierr);
974          break;
975     }
976   }
977   else{
978 
979  switch (ndim){
980  case 1:
981   SETERRQ(comm,PETSC_ERR_SUP,"No support yet");
982   break;
983  case 2:
984       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
985       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
986 
987      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
988      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
989      printf("Val local_0_start = %ld\n",local_0_start);
990 
991      if (dim[1]%2==0)
992       NM = dim[1]+2;
993     else
994       NM = dim[1]+1;
995 
996 
997 
998      for (i=0;i<local_n0;i++){
999         for (j=0;j<dim[1];j++){
1000             tempindx = i*dim[1] + j;
1001             tempindx1 = i*NM + j;
1002             indx1[tempindx]=local_0_start*dim[1]+tempindx;
1003             indx2[tempindx]=low+tempindx1;
1004        //     printf("Val tempindx1 = %d\n",tempindx1);
1005        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1006        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1007        //     printf("-------------------------\n",indx2[tempindx],rank);
1008         }
1009      }
1010 
1011      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1012      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1013 
1014      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1015      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1016      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1017      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1018      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1019      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1020      ierr = PetscFree(indx1);CHKERRQ(ierr);
1021      ierr = PetscFree(indx2);CHKERRQ(ierr);
1022   break;
1023 
1024  case 3:
1025       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1026       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1027 
1028      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1029      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1030      printf("Val local_0_start = %ld\n",local_0_start);
1031 
1032      if (dim[2]%2==0)
1033       NM = dim[2]+2;
1034     else
1035       NM = dim[2]+1;
1036 
1037      for (i=0;i<local_n0;i++){
1038         for (j=0;j<dim[1];j++){
1039            for (k=0;k<dim[2];k++){
1040               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1041               tempindx1 = i*dim[1]*NM + j*NM + k;
1042               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1043               indx2[tempindx]=low+tempindx1;
1044            }
1045         //    printf("Val tempindx1 = %d\n",tempindx1);
1046         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1047         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1048         //    printf("-------------------------\n",indx2[tempindx],rank);
1049         }
1050      }
1051 
1052      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1053      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1054 
1055      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1056      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1057      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1058      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1059      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1060      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1061      ierr = PetscFree(indx1);CHKERRQ(ierr);
1062      ierr = PetscFree(indx2);CHKERRQ(ierr);
1063   break;
1064 
1065   default:
1066      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1067      printf("The value of temp is %ld\n",temp);
1068      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1069      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1070      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1071      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1072 
1073      partial_dim = fftw->partial_dim;
1074      printf("The value of partial dim is %d\n",partial_dim);
1075 
1076      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1077      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1078      printf("Val local_0_start = %ld\n",local_0_start);
1079 
1080      if (dim[ndim-1]%2==0)
1081        NM = 2;
1082      else
1083        NM = 1;
1084 
1085      j = low;
1086      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1087         {
1088          indx1[i] = local_0_start*partial_dim + i;
1089          indx2[i] = j;
1090          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1091          if (k%dim[ndim-1]==0)
1092            { j+=NM;}
1093          j++;
1094         }
1095      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1096      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1097 
1098       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1099 
1100 
1101      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1102      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1103      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1104      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1105      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1106      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1107      ierr = PetscFree(indx1);CHKERRQ(ierr);
1108      ierr = PetscFree(indx2);CHKERRQ(ierr);
1109 
1110   break;
1111  }
1112  }
1113  return 0;
1114 }
1115 EXTERN_C_END
1116 
1117 EXTERN_C_BEGIN
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatCreate_FFTW"
1120 /*
1121       MatCreate_FFTW - Creates a matrix object that provides FFT
1122   via the external package FFTW
1123   Options Database Keys:
1124 + -mat_fftw_plannerflags - set FFTW planner flags
1125 
1126    Level: intermediate
1127 
1128 */
1129 
1130 PetscErrorCode MatCreate_FFTW(Mat A)
1131 {
1132   PetscErrorCode ierr;
1133   MPI_Comm       comm=((PetscObject)A)->comm;
1134   Mat_FFT        *fft=(Mat_FFT*)A->data;
1135   Mat_FFTW       *fftw;
1136   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1137   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1138   PetscBool      flg;
1139   PetscInt       p_flag,partial_dim=1,ctr,N1;
1140   PetscMPIInt    size,rank;
1141   ptrdiff_t      *pdim, temp;
1142   ptrdiff_t      local_n1,local_1_start;
1143 
1144   PetscFunctionBegin;
1145 //#if !defined(PETSC_USE_COMPLEX)
1146 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
1147 //#endif
1148 
1149   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1150   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1151   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1152 
1153   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1154   pdim[0] = dim[0];
1155   for(ctr=1;ctr<ndim;ctr++)
1156       {
1157           partial_dim *= dim[ctr];
1158           pdim[ctr] = dim[ctr];
1159       }
1160 //#if !defined(PETSC_USE_COMPLEX)
1161 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
1162 //#endif
1163 
1164 //  printf("partial dimension is %d",partial_dim);
1165   if (size == 1) {
1166 #if defined(PETSC_USE_COMPLEX)
1167     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1168     n = N;
1169 #else
1170     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1171     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1172     n = tot_dim;
1173 #endif
1174 
1175   } else {
1176     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1177     switch (ndim){
1178     case 1:
1179 #if !defined(PETSC_USE_COMPLEX)
1180   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1181 #else
1182       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
1183       n = (PetscInt)local_n0;
1184       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1185 #endif
1186 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1187       break;
1188     case 2:
1189 #if defined(PETSC_USE_COMPLEX)
1190       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1191       /*
1192        PetscMPIInt    rank;
1193        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1194        PetscSynchronizedFlush(comm);
1195        */
1196       n = (PetscInt)local_n0*dim[1];
1197       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1198 #else
1199       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1200       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1201       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1202 #endif
1203       break;
1204     case 3:
1205 //      printf("The value of alloc local is %d",alloc_local);
1206 #if defined(PETSC_USE_COMPLEX)
1207       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1208       n = (PetscInt)local_n0*dim[1]*dim[2];
1209       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1210 #else
1211       printf("The code comes here\n");
1212       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1213       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1214       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1215 #endif
1216       break;
1217     default:
1218 #if defined(PETSC_USE_COMPLEX)
1219       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1220 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
1221 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
1222       n = (PetscInt)local_n0*partial_dim;
1223 //      printf("New partial dimension is %d %d %d",n,N,ndim);
1224       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1225 #else
1226       temp = pdim[ndim-1];
1227       pdim[ndim-1]= temp/2 + 1;
1228       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1229       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1230       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1231       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1232       pdim[ndim-1] = temp;
1233       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1234       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1235 #endif
1236       break;
1237     }
1238   }
1239   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1240   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1241   fft->data = (void*)fftw;
1242 
1243   fft->n           = n;
1244   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1245   fftw->partial_dim = partial_dim;
1246   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1247   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1248 
1249   fftw->p_forward  = 0;
1250   fftw->p_backward = 0;
1251   fftw->p_flag     = FFTW_ESTIMATE;
1252 
1253   if (size == 1){
1254     A->ops->mult          = MatMult_SeqFFTW;
1255     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1256   } else {
1257     A->ops->mult          = MatMult_MPIFFTW;
1258     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1259   }
1260   fft->matdestroy          = MatDestroy_FFTW;
1261   A->ops->getvecs       = MatGetVecs_FFTW;
1262   A->assembled          = PETSC_TRUE;
1263 #if !defined(PETSC_USE_COMPLEX)
1264   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1265   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1266 #endif
1267 
1268   /* get runtime options */
1269   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1270     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1271     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1272   PetscOptionsEnd();
1273   PetscFunctionReturn(0);
1274 }
1275 EXTERN_C_END
1276 
1277 
1278 
1279 
1280