xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision a5b82c69bcc640643a79966de633db94f563635f)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed!
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "MatMult_SeqFFTW"
31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32 {
33   PetscErrorCode ierr;
34   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36   PetscScalar    *x_array,*y_array;
37   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38 
39   PetscFunctionBegin;
40   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42   if (!fftw->p_forward){ /* create a plan, then excute it */
43     switch (ndim){
44     case 1:
45 #if defined(PETSC_USE_COMPLEX)
46       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
47 #else
48       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
49 #endif
50       break;
51     case 2:
52 #if defined(PETSC_USE_COMPLEX)
53       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54 #else
55       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
56 #endif
57       break;
58     case 3:
59 #if defined(PETSC_USE_COMPLEX)
60       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
61 #else
62       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
63 #endif
64       break;
65     default:
66 #if defined(PETSC_USE_COMPLEX)
67       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68 #else
69       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
70 #endif
71       break;
72     }
73     fftw->finarray  = x_array;
74     fftw->foutarray = y_array;
75     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77     fftw_execute(fftw->p_forward);
78   } else { /* use existing plan */
79     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81     } else {
82       fftw_execute(fftw->p_forward);
83     }
84   }
85   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93 {
94   PetscErrorCode ierr;
95   Mat_FFT        *fft = (Mat_FFT*)A->data;
96   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97   PetscScalar    *x_array,*y_array;
98   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99 
100   PetscFunctionBegin;
101   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103   if (!fftw->p_backward){ /* create a plan, then excute it */
104     switch (ndim){
105     case 1:
106 #if defined(PETSC_USE_COMPLEX)
107       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
108 #else
109       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
110 #endif
111       break;
112     case 2:
113 #if defined(PETSC_USE_COMPLEX)
114       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
115 #else
116       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
117 #endif
118       break;
119     case 3:
120 #if defined(PETSC_USE_COMPLEX)
121       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
122 #else
123       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
124 #endif
125       break;
126     default:
127 #if defined(PETSC_USE_COMPLEX)
128       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
129 #else
130       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
131 #endif
132       break;
133     }
134     fftw->binarray  = x_array;
135     fftw->boutarray = y_array;
136     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137   } else { /* use existing plan */
138     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140     } else {
141       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142     }
143   }
144   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "MatMult_MPIFFTW"
151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152 {
153   PetscErrorCode ierr;
154   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156   PetscScalar    *x_array,*y_array;
157   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158   MPI_Comm       comm=((PetscObject)A)->comm;
159 
160   PetscFunctionBegin;
161   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163   if (!fftw->p_forward){ /* create a plan, then excute it */
164     switch (ndim){
165     case 1:
166 #if defined(PETSC_USE_COMPLEX)
167       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168 #else
169       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
170 #endif
171       break;
172     case 2:
173 #if defined(PETSC_USE_COMPLEX)
174       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
175 #else
176       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
177 #endif
178       break;
179     case 3:
180 #if defined(PETSC_USE_COMPLEX)
181       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
182 #else
183       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
184 #endif
185       break;
186     default:
187 #if defined(PETSC_USE_COMPLEX)
188       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
189 #else
190       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
191 #endif
192       break;
193     }
194     fftw->finarray  = x_array;
195     fftw->foutarray = y_array;
196     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198     fftw_execute(fftw->p_forward);
199   } else { /* use existing plan */
200     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202     } else {
203       fftw_execute(fftw->p_forward);
204     }
205   }
206   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214 {
215   PetscErrorCode ierr;
216   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218   PetscScalar    *x_array,*y_array;
219   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220   MPI_Comm       comm=((PetscObject)A)->comm;
221 
222   PetscFunctionBegin;
223   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225   if (!fftw->p_backward){ /* create a plan, then excute it */
226     switch (ndim){
227     case 1:
228 #if defined(PETSC_USE_COMPLEX)
229       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230 #else
231       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
232 #endif
233       break;
234     case 2:
235 #if defined(PETSC_USE_COMPLEX)
236       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
237 #else
238       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
239 #endif
240       break;
241     case 3:
242 #if defined(PETSC_USE_COMPLEX)
243       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
244 #else
245       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
246 #endif
247       break;
248     default:
249 #if defined(PETSC_USE_COMPLEX)
250       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
251 #else
252       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
253 #endif
254       break;
255     }
256     fftw->binarray  = x_array;
257     fftw->boutarray = y_array;
258     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259   } else { /* use existing plan */
260     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262     } else {
263       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264     }
265   }
266   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_FFTW"
273 PetscErrorCode MatDestroy_FFTW(Mat A)
274 {
275   Mat_FFT        *fft = (Mat_FFT*)A->data;
276   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   fftw_destroy_plan(fftw->p_forward);
281   fftw_destroy_plan(fftw->p_backward);
282   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288 #undef __FUNCT__
289 #define __FUNCT__ "VecDestroy_MPIFFTW"
290 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291 {
292   PetscErrorCode  ierr;
293   PetscScalar     *array;
294 
295   PetscFunctionBegin;
296   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatGetVecsFFTW_1DC"
305 /*
306     - Get Vectors(s) compatible with matrix, i.e. with the
307      parallel layout determined by FFTW-1D
308 
309 */
310 PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout)
311 {
312   PetscErrorCode ierr;
313   PetscMPIInt    size,rank;
314   MPI_Comm       comm=((PetscObject)A)->comm;
315   Mat_FFT        *fft = (Mat_FFT*)A->data;
316 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
317   PetscInt       N=fft->N;
318   PetscInt       ndim=fft->ndim,*dim=fft->dim;
319   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
320   ptrdiff_t      f_local_n1,f_local_1_end;
321   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
322   ptrdiff_t      b_local_n1,b_local_1_end;
323   fftw_complex   *data_fin,*data_fout,*data_bout;
324 
325   PetscFunctionBegin;
326 #if !defined(PETSC_USE_COMPLEX)
327   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
328 #endif
329   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
330   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
331   if (size == 1){
332     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
333   }
334   else {
335       if (ndim>1){
336         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
337       else {
338           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
339           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
340           if (fin) {
341             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
342             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
343             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
344           }
345           if (fout) {
346             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
347             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
348             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
349           }
350           if (bout) {
351             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
352             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
353             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
354           }
355   }
356   if (fin){
357     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
358   }
359   if (fout){
360     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
361   }
362   if (bout){
363     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 
369 }
370 
371 #undef __FUNCT__
372 #define __FUNCT__ "MatGetVecsFFTW"
373 /*@
374    MatGetVecFFTW - Get vector(s) compatible with the matrix, i.e. with the
375      parallel layout determined by FFTW
376 
377    Collective on Mat
378 
379    Input Parameter:
380 .  mat - the matrix
381 
382    Output Parameter:
383 +   fin - (optional) input vector of forward FFTW
384 -   fout - (optional) output vector of forward FFTW
385 
386   Level: advanced
387 
388 .seealso: MatCreateFFTW()
389 @*/
390 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
391 {
392   PetscErrorCode ierr;
393   PetscFunctionBegin;
394   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 EXTERN_C_BEGIN
399 #undef __FUNCT__
400 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
401 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
402 {
403   PetscErrorCode ierr;
404   PetscMPIInt    size,rank;
405   MPI_Comm       comm=((PetscObject)A)->comm;
406   Mat_FFT        *fft = (Mat_FFT*)A->data;
407   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
408   PetscInt       N=fft->N, N1, n1,vsize;
409   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
410 
411   PetscFunctionBegin;
412   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
413   PetscValidType(A,1);
414 
415   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
416   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
417   printf("size is %d\n",size);
418   if (size == 1){ /* sequential case */
419   printf("Routine is getting called\n");
420 #if defined(PETSC_USE_COMPLEX)
421     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
422     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
423     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
424 #else
425     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
426     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
427     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
428 #endif
429   } else {
430     ptrdiff_t      alloc_local,local_n0,local_0_start;
431     ptrdiff_t      local_n1,local_1_end;
432     fftw_complex   *data_fin,*data_fout,*data_bout;
433     double         *data_finr,*data_boutr ;
434     ptrdiff_t      local_1_start,temp;
435     switch (ndim){
436           case 1:
437 #if !defined(PETSC_USE_COMPLEX)
438                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
439 #else
440                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
441                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
442                  if (fin) {
443                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
444                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
445                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
446                          }
447                  if (fout) {
448                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
449                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
450                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
451                          }
452                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
453                  if (bout) {
454                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
455                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
456                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
457                          }
458                  break;
459 
460 
461 #endif
462           case 2:
463 #if !defined(PETSC_USE_COMPLEX)
464                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
465                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
466                  if (fin) {
467                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
468                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
469                            ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
470                            //printf("The code comes here with vector size %d\n",vsize);
471                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
472                           }
473                  if (bout) {
474                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
475                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
476                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
477                            //printf("The code comes here with vector size %d\n",vsize);
478                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
479                           }
480                  if (fout) {
481                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
482                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
483                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
484                            }
485 
486                            //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
487 
488 #else
489       /* Get local size */
490                  printf("Code works for paralllel 2d complex DFT\n");
491                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
492                  if (fin) {
493                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
494                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
495                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
496                           }
497                  if (fout) {
498                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
499                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
500                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
501                            }
502                  if (bout) {
503                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
504                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
505                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
506                           }
507 
508      //printf("Hope this does not come here");
509 #endif
510       break;
511 
512           case 3:
513 #if !defined(PETSC_USE_COMPLEX)
514                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
515                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
516                  if (fin) {
517                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
518                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
519                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
520         //printf("The code comes here with vector size %d\n",vsize);
521                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
522                          }
523                  if (bout) {
524                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
525                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
526                         // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
527         //printf("The code comes here with vector size %d\n",vsize);
528                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
529                           }
530                  if (fout) {
531                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
532                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
533                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
534                           }
535 
536       //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
537 
538 #else
539                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
540 //      printf("The quantity n is %d",n);
541                  if (fin) {
542                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
543                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
544                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
545                          }
546                  if (fout) {
547                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
548                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
549                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
550                          }
551                  if (bout) {
552                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
553                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
554                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
555                          }
556 
557 #endif
558                  break;
559           default:
560 #if !defined(PETSC_USE_COMPLEX)
561                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
562                  printf("The value of temp is %ld\n",temp);
563                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
564                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
565                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
566                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
567 
568                  if (fin) {
569                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
570                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
571                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
572         //printf("The code comes here with vector size %d\n",vsize);
573                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
574                         }
575                  if (bout) {
576                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
577                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
578                          //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
579         //printf("The code comes here with vector size %d\n",vsize);
580                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
581                         }
582                  if (fout) {
583                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
584                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
585                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
586                         }
587 
588 #else
589                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
590                 if (fin) {
591                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
592                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
593                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
594                        }
595                 if (fout) {
596                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
597                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
598                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
599                        }
600                 if (bout) {
601                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
602                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
603                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
604                        }
605 
606 
607 
608 #endif
609             break;
610           }
611   }
612   PetscFunctionReturn(0);
613 }
614 EXTERN_C_END
615 
616 #undef __FUNCT__
617 #define __FUNCT__ "MatGetVecs_FFTW"
618 /*
619    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
620      parallel layout determined by FFTW
621 
622    Collective on Mat
623 
624    Input Parameter:
625 .  mat - the matrix
626 
627    Output Parameter:
628 +   fin - (optional) input vector of forward FFTW
629 -   fout - (optional) output vector of forward FFTW
630 
631   Level: advanced
632 
633 .seealso: MatCreateFFTW()
634 */
635 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
636 {
637   PetscErrorCode ierr;
638   PetscMPIInt    size,rank;
639   MPI_Comm       comm=((PetscObject)A)->comm;
640   Mat_FFT        *fft = (Mat_FFT*)A->data;
641   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
642   PetscInt       N=fft->N, N1, n1,vsize;
643   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
644 
645   PetscFunctionBegin;
646   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
647   PetscValidType(A,1);
648 
649   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
650   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
651   if (size == 1){ /* sequential case */
652 #if defined(PETSC_USE_COMPLEX)
653     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
654     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
655 #else
656     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
657     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
658 #endif
659   } else {        /* mpi case */
660     ptrdiff_t      alloc_local,local_n0,local_0_start;
661     ptrdiff_t      local_n1,local_1_end;
662     fftw_complex   *data_fin,*data_fout;
663     double         *data_finr ;
664     ptrdiff_t      local_1_start,temp;
665 //    PetscInt ctr;
666 //    ptrdiff_t      ndim1,*pdim;
667 //    ndim1=(ptrdiff_t) ndim;
668 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
669 
670 //    for(ctr=0;ctr<ndim;ctr++)
671 //        {k
672 //           pdim[ctr] = dim[ctr];
673 //       }
674 
675 
676 
677     switch (ndim){
678     case 1:
679       /* Get local size */
680       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
681 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
682       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
683       printf("The values of local_n0 and local_n1 are %d and %d",local_n0,local_n1);
684       if (fin) {
685         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
686         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
687         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
688       }
689       if (fout) {
690         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
691         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
692         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
693       }
694       break;
695     case 2:
696 #if !defined(PETSC_USE_COMPLEX)
697       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
698       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
699       if (fin) {
700         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
701         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
702         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
703         //printf("The code comes here with vector size %d\n",vsize);
704         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
705       }
706       n1 = 2*local_n1*(dim[0]);
707       //n1 = 2*local_n1*dim[1];
708       printf("The values are %ld\n",local_n1);
709       if (fout) {
710         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
711         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
712         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
713       }
714       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
715 
716 #else
717       /* Get local size */
718      //printf("Hope this does not come here");
719       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
720       if (fin) {
721         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
722         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
723         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
724       }
725       if (fout) {
726         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
727         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
728         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
729       }
730      //printf("Hope this does not come here");
731 #endif
732       break;
733     case 3:
734       /* Get local size */
735 #if !defined(PETSC_USE_COMPLEX)
736       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
737       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
738       if (fin) {
739         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
740         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
741         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
742         //printf("The code comes here with vector size %d\n",vsize);
743         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
744       }
745       printf("The value is %ld",local_n1);
746       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
747       if (fout) {
748         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
749         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
750         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
751       }
752       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
753 
754 
755 #else
756       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
757 //      printf("The quantity n is %d",n);
758       if (fin) {
759         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
760         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
761         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
762       }
763       if (fout) {
764         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
765         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
766         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
767       }
768 #endif
769       break;
770     default:
771       /* Get local size */
772 #if !defined(PETSC_USE_COMPLEX)
773       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
774       printf("The value of temp is %ld\n",temp);
775       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
776       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
777       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
778       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
779       if (fin) {
780         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
781         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
782         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
783         //printf("The code comes here with vector size %d\n",vsize);
784         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
785       }
786       printf("The value is %ld. Global length is %d \n",local_n1,N1);
787       temp = fftw->partial_dim;
788       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
789 
790       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
791       if (fout) {
792         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
793         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
794         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
795       }
796 
797 #else
798       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
799 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
800 //      printf("The value of alloc local is %d",alloc_local);
801 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
802 //      for(i=0;i<ndim;i++)
803 //         {
804 //          pdim[i]=dim[i];printf("%d",pdim[i]);
805 //         }
806 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
807 //      printf("The quantity n is %d",n);
808       if (fin) {
809         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
810         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
811         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
812       }
813       if (fout) {
814         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
815         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
816         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
817       }
818 #endif
819       break;
820     }
821   }
822 //  if (fin){
823 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
824 //  }
825 //  if (fout){
826 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
827 //  }
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNCT__
832 #define __FUNCT__ "InputTransformFFT"
833 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
834 {
835   PetscErrorCode ierr;
836   PetscFunctionBegin;
837   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
843   Input A, x, y
844   A - FFTW matrix
845   x - user data
846   Options Database Keys:
847 + -mat_fftw_plannerflags - set FFTW planner flags
848 
849    Level: intermediate
850 
851 */
852 
853 EXTERN_C_BEGIN
854 #undef __FUNCT__
855 #define __FUNCT__ "InputTransformFFT_FTTW"
856 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
857 {
858   PetscErrorCode ierr;
859   MPI_Comm       comm=((PetscObject)A)->comm;
860   Mat_FFT        *fft = (Mat_FFT*)A->data;
861   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
862   PetscInt       N=fft->N, N1, n1 ,NM;
863   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
864   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
865   PetscInt       i,j,k,rank,size,partial_dim;
866   ptrdiff_t      alloc_local,local_n0,local_0_start;
867   ptrdiff_t      local_n1,local_1_start,temp;
868   VecScatter     vecscat;
869   IS             list1,list2;
870 
871   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
872   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
873   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
874   printf("Local ownership starts at %d\n",low);
875 
876   if (size==1)
877     {
878 /*     switch (ndim){
879      case 1:
880           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
881           for (i=0;i<dim[0];i++)
882              {
883               indx1[i] = i;
884              }
885           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
886           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
887           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
890           ierr = ISDestroy(&list1);CHKERRQ(ierr);
891           ierr = PetscFree(indx1);CHKERRQ(ierr);
892      break;
893 
894      case 2:
895           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
896           for (i=0;i<dim[0];i++){
897              for (j=0;j<dim[1];j++){
898                 indx1[i*dim[1]+j] = i*dim[1] + j;
899              }
900           }
901           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
902           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
903           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
904           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
906           ierr = ISDestroy(&list1);CHKERRQ(ierr);
907           ierr = PetscFree(indx1);CHKERRQ(ierr);
908           break;
909      case 3:
910           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
911           for (i=0;i<dim[0];i++){
912              for (j=0;j<dim[1];j++){
913                 for (k=0;k<dim[2];k++){
914                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
915                 }
916              }
917           }
918           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
919           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
920           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
923           ierr = ISDestroy(&list1);CHKERRQ(ierr);
924           ierr = PetscFree(indx1);CHKERRQ(ierr);
925           break;
926      default:
927 */
928           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
929           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
930           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
932           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
933           ierr = ISDestroy(&list1);CHKERRQ(ierr);
934           //ierr = ISDestroy(list1);CHKERRQ(ierr);
935  //         break;
936   //    }
937     }
938 
939  else{
940 
941  switch (ndim){
942  case 1:
943 #if defined(PETSC_USE_COMPLEX)
944       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
945       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
946       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
947       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
948       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
949       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
950       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
953       ierr = ISDestroy(&list1);CHKERRQ(ierr);
954       ierr = ISDestroy(&list2);CHKERRQ(ierr);
955       break;
956 #else
957   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
958 #endif
959   break;
960  case 2:
961 #if defined(PETSC_USE_COMPLEX)
962       //PetscInt my_value;
963       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
964       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
965       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
966       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
967       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
968       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
969       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
970       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
971       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
972       ierr = ISDestroy(&list1);CHKERRQ(ierr);
973       ierr = ISDestroy(&list2);CHKERRQ(ierr);
974       break;
975 #else
976       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
977       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
978 
979       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
980       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
981       printf("Val local_0_start = %ld\n",local_0_start);
982 
983       if (dim[1]%2==0)
984         NM = dim[1]+2;
985       else
986         NM = dim[1]+1;
987 
988       for (i=0;i<local_n0;i++){
989          for (j=0;j<dim[1];j++){
990             tempindx = i*dim[1] + j;
991             tempindx1 = i*NM + j;
992             indx1[tempindx]=local_0_start*dim[1]+tempindx;
993             indx2[tempindx]=low+tempindx1;
994            // printf("Val tempindx1 = %d\n",tempindx1);
995   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
996   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
997   //          printf("-------------------------\n",indx2[tempindx],rank);
998         }
999      }
1000 
1001       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1002       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1003 
1004       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1005       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1007       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1008       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1009       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1010       ierr = PetscFree(indx1);CHKERRQ(ierr);
1011       ierr = PetscFree(indx2);CHKERRQ(ierr);
1012       break;
1013 #endif
1014 
1015  case 3:
1016       #if defined(PETSC_USE_COMPLEX)
1017       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1018       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1019       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1020       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1021       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1022       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1023       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1024       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1026       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1027       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1028       break;
1029 #else
1030       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1031       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1032 
1033       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1034       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1035       printf("Val local_0_start = %ld\n",local_0_start);
1036 
1037       if (dim[2]%2==0)
1038         NM = dim[2]+2;
1039       else
1040         NM = dim[2]+1;
1041 
1042       for (i=0;i<local_n0;i++){
1043          for (j=0;j<dim[1];j++){
1044             for (k=0;k<dim[2];k++){
1045                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1046                tempindx1 = i*dim[1]*NM + j*NM + k;
1047                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1048                indx2[tempindx]=low+tempindx1;
1049             }
1050            // printf("Val tempindx1 = %d\n",tempindx1);
1051            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1052            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1053            // printf("-------------------------\n",indx2[tempindx],rank);
1054         }
1055      }
1056 
1057       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1058       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1059 
1060       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1061       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1062       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1063       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1064       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1065       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1066       ierr = PetscFree(indx1);CHKERRQ(ierr);
1067       ierr = PetscFree(indx2);CHKERRQ(ierr);
1068       break;
1069 #endif
1070 
1071  default:
1072 #if defined(PETSC_USE_COMPLEX)
1073       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1074       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1075       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1076       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1077       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1078       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1079       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1080       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1082       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1083       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1084 
1085 
1086 #else
1087       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1088       printf("The value of temp is %ld\n",temp);
1089       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1090       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1091       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1092       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1093 
1094       partial_dim = fftw->partial_dim;
1095       printf("The value of partial dim is %d\n",partial_dim);
1096 
1097       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1098       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1099       printf("Val local_0_start = %ld\n",local_0_start);
1100 
1101       if (dim[ndim-1]%2==0)
1102         NM = 2;
1103       else
1104         NM = 1;
1105 
1106       j = low;
1107       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1108          {
1109           indx1[i] = local_0_start*partial_dim + i;
1110           indx2[i] = j;
1111           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1112           if (k%dim[ndim-1]==0)
1113             { j+=NM;}
1114           j++;
1115          }
1116       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1117       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1118       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1119       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1121       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1122       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1123       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1124       ierr = PetscFree(indx1);CHKERRQ(ierr);
1125       ierr = PetscFree(indx2);CHKERRQ(ierr);
1126       break;
1127 #endif
1128   }
1129  }
1130 
1131  return 0;
1132 }
1133 EXTERN_C_END
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "OutputTransformFFT"
1137 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
1138 {
1139   PetscErrorCode ierr;
1140   PetscFunctionBegin;
1141   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 /*
1146       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
1147   Input A, x, y
1148   A - FFTW matrix
1149   x - FFTW vector
1150   y - PETSc vector that user can read
1151   Options Database Keys:
1152 + -mat_fftw_plannerflags - set FFTW planner flags
1153 
1154    Level: intermediate
1155 
1156 */
1157 
1158 EXTERN_C_BEGIN
1159 #undef __FUNCT__
1160 #define __FUNCT__ "OutputTransformFFT_FTTW"
1161 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
1162 {
1163   PetscErrorCode ierr;
1164   MPI_Comm       comm=((PetscObject)A)->comm;
1165   Mat_FFT        *fft = (Mat_FFT*)A->data;
1166   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
1167   PetscInt       N=fft->N, N1, n1 ,NM;
1168   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
1169   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
1170   PetscInt       i,j,k,rank,size,partial_dim;
1171   ptrdiff_t      alloc_local,local_n0,local_0_start;
1172   ptrdiff_t      local_n1,local_1_start,temp;
1173   VecScatter     vecscat;
1174   IS             list1,list2;
1175 
1176   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1177   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1178   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
1179 
1180   if (size==1){
1181 /*
1182     switch (ndim){
1183     case 1:
1184            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1185           for (i=0;i<dim[0];i++)
1186              {
1187               indx1[i] = i;
1188              }
1189           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1190           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1191           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1192           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1193           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1194           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1195           ierr = PetscFree(indx1);CHKERRQ(ierr);
1196           break;
1197 
1198     case 2:
1199          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1200           for (i=0;i<dim[0];i++){
1201              for (j=0;j<dim[1];j++){
1202                 indx1[i*dim[1]+j] = i*dim[1] + j;
1203              }
1204           }
1205          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1206          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1207          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1208          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1209          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1210          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1211          ierr = PetscFree(indx1);CHKERRQ(ierr);
1212          break;
1213     case 3:
1214          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1215          for (i=0;i<dim[0];i++){
1216             for (j=0;j<dim[1];j++){
1217                for (k=0;k<dim[2];k++){
1218                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1219                }
1220             }
1221          }
1222          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1223          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1224          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1225          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1226          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1227          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1228          ierr = PetscFree(indx1);CHKERRQ(ierr);
1229          break;
1230     default:
1231 */
1232          ierr = ISCreateStride(comm,N,0,1,&list1);
1233          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1234          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1235          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1237          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1238          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1239   //       break;
1240    // }
1241   }
1242   else{
1243 
1244  switch (ndim){
1245  case 1:
1246 #if defined(PETSC_USE_COMPLEX)
1247       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1248       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
1249       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
1250       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1251       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1252       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1253       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1255       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1256       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1257       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1258       break;
1259 
1260 #else
1261       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1262 #endif
1263   break;
1264  case 2:
1265 #if defined(PETSC_USE_COMPLEX)
1266       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1267       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1268       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1269       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1270       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1271       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1272       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1273       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1274       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1275       break;
1276 #else
1277       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1278       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
1279 
1280      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1281      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1282      printf("Val local_0_start = %ld\n",local_0_start);
1283 
1284      if (dim[1]%2==0)
1285       NM = dim[1]+2;
1286     else
1287       NM = dim[1]+1;
1288 
1289 
1290 
1291      for (i=0;i<local_n0;i++){
1292         for (j=0;j<dim[1];j++){
1293             tempindx = i*dim[1] + j;
1294             tempindx1 = i*NM + j;
1295             indx1[tempindx]=local_0_start*dim[1]+tempindx;
1296             indx2[tempindx]=low+tempindx1;
1297        //     printf("Val tempindx1 = %d\n",tempindx1);
1298        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1299        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1300        //     printf("-------------------------\n",indx2[tempindx],rank);
1301         }
1302      }
1303 
1304      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1305      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1306 
1307      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1308      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1309      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1310      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1311      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1312      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1313      ierr = PetscFree(indx1);CHKERRQ(ierr);
1314      ierr = PetscFree(indx2);CHKERRQ(ierr);
1315   break;
1316 #endif
1317  case 3:
1318 #if defined(PETSC_USE_COMPLEX)
1319       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1320       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1321       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1322       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1323       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1324       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1325       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1326       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1327       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1328       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1329       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1330       break;
1331 #else
1332 
1333       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1334       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1335 
1336      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1337      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1338      printf("Val local_0_start = %ld\n",local_0_start);
1339 
1340      if (dim[2]%2==0)
1341       NM = dim[2]+2;
1342     else
1343       NM = dim[2]+1;
1344 
1345      for (i=0;i<local_n0;i++){
1346         for (j=0;j<dim[1];j++){
1347            for (k=0;k<dim[2];k++){
1348               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1349               tempindx1 = i*dim[1]*NM + j*NM + k;
1350               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1351               indx2[tempindx]=low+tempindx1;
1352            }
1353         //    printf("Val tempindx1 = %d\n",tempindx1);
1354         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1355         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1356         //    printf("-------------------------\n",indx2[tempindx],rank);
1357         }
1358      }
1359 
1360      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1361      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1362 
1363      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1364      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1365      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1366      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1367      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1368      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1369      ierr = PetscFree(indx1);CHKERRQ(ierr);
1370      ierr = PetscFree(indx2);CHKERRQ(ierr);
1371   break;
1372 #endif
1373   default:
1374 #if defined(PETSC_USE_COMPLEX)
1375       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1376       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1377       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1378       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1379       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1380       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1381       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1382       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1383       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1384       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1385       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1386 #else
1387      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1388      printf("The value of temp is %ld\n",temp);
1389      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1390      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1391      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1392      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1393 
1394      partial_dim = fftw->partial_dim;
1395      printf("The value of partial dim is %d\n",partial_dim);
1396 
1397      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1398      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1399      printf("Val local_0_start = %ld\n",local_0_start);
1400 
1401      if (dim[ndim-1]%2==0)
1402        NM = 2;
1403      else
1404        NM = 1;
1405 
1406      j = low;
1407      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1408         {
1409          indx1[i] = local_0_start*partial_dim + i;
1410          indx2[i] = j;
1411          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1412          if (k%dim[ndim-1]==0)
1413            { j+=NM;}
1414          j++;
1415         }
1416      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1417      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1418 
1419       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1420 
1421 
1422      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1423      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1424      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1425      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1426      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1427      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1428      ierr = PetscFree(indx1);CHKERRQ(ierr);
1429      ierr = PetscFree(indx2);CHKERRQ(ierr);
1430 
1431      break;
1432 #endif
1433  }
1434  }
1435  return 0;
1436 }
1437 EXTERN_C_END
1438 
1439 EXTERN_C_BEGIN
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatCreate_FFTW"
1442 /*
1443       MatCreate_FFTW - Creates a matrix object that provides FFT
1444   via the external package FFTW
1445   Options Database Keys:
1446 + -mat_fftw_plannerflags - set FFTW planner flags
1447 
1448    Level: intermediate
1449 
1450 */
1451 
1452 PetscErrorCode MatCreate_FFTW(Mat A)
1453 {
1454   PetscErrorCode ierr;
1455   MPI_Comm       comm=((PetscObject)A)->comm;
1456   Mat_FFT        *fft=(Mat_FFT*)A->data;
1457   Mat_FFTW       *fftw;
1458   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1459   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1460   PetscBool      flg;
1461   PetscInt       p_flag,partial_dim=1,ctr,N1;
1462   PetscMPIInt    size,rank;
1463   ptrdiff_t      *pdim, temp;
1464   ptrdiff_t      local_n1,local_1_start,local_1_end;
1465 
1466   PetscFunctionBegin;
1467   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1468   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1469   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1470 
1471   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1472   pdim[0] = dim[0];
1473   for(ctr=1;ctr<ndim;ctr++)
1474       {
1475           partial_dim *= dim[ctr];
1476           pdim[ctr] = dim[ctr];
1477       }
1478 
1479   if (size == 1) {
1480 #if defined(PETSC_USE_COMPLEX)
1481     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1482     n = N;
1483 #else
1484     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1485     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1486     n = tot_dim;
1487 #endif
1488 
1489   } else {
1490     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1491     switch (ndim){
1492     case 1:
1493 #if !defined(PETSC_USE_COMPLEX)
1494   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1495 #else
1496       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
1497       n = (PetscInt)local_n0;
1498       printf("The value of n is %d",n);
1499       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1500 #endif
1501 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1502       break;
1503     case 2:
1504 #if defined(PETSC_USE_COMPLEX)
1505       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1506       /*
1507        PetscMPIInt    rank;
1508        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1509        PetscSynchronizedFlush(comm);
1510        */
1511       n = (PetscInt)local_n0*dim[1];
1512       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1513 #else
1514       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1515       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1516       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1517 #endif
1518       break;
1519     case 3:
1520 //      printf("The value of alloc local is %d",alloc_local);
1521 #if defined(PETSC_USE_COMPLEX)
1522       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1523       n = (PetscInt)local_n0*dim[1]*dim[2];
1524       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1525 #else
1526       printf("The code comes here\n");
1527       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1528       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1529       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1530 #endif
1531       break;
1532     default:
1533 #if defined(PETSC_USE_COMPLEX)
1534       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1535 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
1536 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
1537       n = (PetscInt)local_n0*partial_dim;
1538 //      printf("New partial dimension is %d %d %d",n,N,ndim);
1539       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1540 #else
1541       temp = pdim[ndim-1];
1542       pdim[ndim-1]= temp/2 + 1;
1543       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1544       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1545       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1546       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1547       pdim[ndim-1] = temp;
1548       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1549       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1550 #endif
1551       break;
1552     }
1553   }
1554   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1555   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1556   fft->data = (void*)fftw;
1557 
1558   fft->n           = n;
1559   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1560   fftw->partial_dim = partial_dim;
1561   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1562   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1563 
1564   fftw->p_forward  = 0;
1565   fftw->p_backward = 0;
1566   fftw->p_flag     = FFTW_ESTIMATE;
1567 
1568   if (size == 1){
1569     A->ops->mult          = MatMult_SeqFFTW;
1570     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1571   } else {
1572     A->ops->mult          = MatMult_MPIFFTW;
1573     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1574   }
1575   fft->matdestroy          = MatDestroy_FFTW;
1576   // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1577   //A->ops->getvecs       = MatGetVecs_FFTW;
1578   A->assembled          = PETSC_TRUE;
1579   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
1580   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1581   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1582 
1583   /* get runtime options */
1584   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1585     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1586     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1587   PetscOptionsEnd();
1588   PetscFunctionReturn(0);
1589 }
1590 EXTERN_C_END
1591 
1592 
1593 
1594 
1595