xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 4cc180ff205dee8612468d415de84efe44f0413b)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28 extern PetscErrorCode MatGetVecsFFT_FFTW(Mat,Vec*,Vec*,Vec*);
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMult_SeqFFTW"
32 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
33 {
34   PetscErrorCode ierr;
35   Mat_FFT        *fft  = (Mat_FFT*)A->data;
36   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
37   PetscScalar    *x_array,*y_array;
38   PetscInt       ndim=fft->ndim,*dim=fft->dim;
39 
40   PetscFunctionBegin;
41   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
42   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
43   if (!fftw->p_forward){ /* create a plan, then excute it */
44     switch (ndim){
45     case 1:
46 #if defined(PETSC_USE_COMPLEX)
47       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
48 #else
49       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
50 #endif
51       break;
52     case 2:
53 #if defined(PETSC_USE_COMPLEX)
54       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
55 #else
56       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
57 #endif
58       break;
59     case 3:
60 #if defined(PETSC_USE_COMPLEX)
61       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
62 #else
63       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
64 #endif
65       break;
66     default:
67 #if defined(PETSC_USE_COMPLEX)
68       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
69 #else
70       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
71 #endif
72       break;
73     }
74     fftw->finarray  = x_array;
75     fftw->foutarray = y_array;
76     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
77                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
78     fftw_execute(fftw->p_forward);
79   } else { /* use existing plan */
80     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
81       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
82     } else {
83       fftw_execute(fftw->p_forward);
84     }
85   }
86   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
87   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
93 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
94 {
95   PetscErrorCode ierr;
96   Mat_FFT        *fft = (Mat_FFT*)A->data;
97   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
98   PetscScalar    *x_array,*y_array;
99   PetscInt       ndim=fft->ndim,*dim=fft->dim;
100 
101   PetscFunctionBegin;
102   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
103   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
104   if (!fftw->p_backward){ /* create a plan, then excute it */
105     switch (ndim){
106     case 1:
107 #if defined(PETSC_USE_COMPLEX)
108       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
109 #else
110       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
111 #endif
112       break;
113     case 2:
114 #if defined(PETSC_USE_COMPLEX)
115       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
116 #else
117       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
118 #endif
119       break;
120     case 3:
121 #if defined(PETSC_USE_COMPLEX)
122       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
123 #else
124       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
125 #endif
126       break;
127     default:
128 #if defined(PETSC_USE_COMPLEX)
129       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
130 #else
131       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
132 #endif
133       break;
134     }
135     fftw->binarray  = x_array;
136     fftw->boutarray = y_array;
137     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
138   } else { /* use existing plan */
139     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
140       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
141     } else {
142       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
143     }
144   }
145   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
146   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 #undef __FUNCT__
151 #define __FUNCT__ "MatMult_MPIFFTW"
152 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
153 {
154   PetscErrorCode ierr;
155   Mat_FFT        *fft  = (Mat_FFT*)A->data;
156   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
157   PetscScalar    *x_array,*y_array;
158   PetscInt       ndim=fft->ndim,*dim=fft->dim;
159   MPI_Comm       comm=((PetscObject)A)->comm;
160 
161   PetscFunctionBegin;
162   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
163   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
164   if (!fftw->p_forward){ /* create a plan, then excute it */
165     switch (ndim){
166     case 1:
167 #if defined(PETSC_USE_COMPLEX)
168       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
169 #else
170       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
171 #endif
172       break;
173     case 2:
174 #if defined(PETSC_USE_COMPLEX)
175       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
176 #else
177       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
178 #endif
179       break;
180     case 3:
181 #if defined(PETSC_USE_COMPLEX)
182       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
183 #else
184       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
185 #endif
186       break;
187     default:
188 #if defined(PETSC_USE_COMPLEX)
189       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
190 #else
191       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
192 #endif
193       break;
194     }
195     fftw->finarray  = x_array;
196     fftw->foutarray = y_array;
197     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
198                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
199     fftw_execute(fftw->p_forward);
200   } else { /* use existing plan */
201     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
202       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
203     } else {
204       fftw_execute(fftw->p_forward);
205     }
206   }
207   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
208   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
214 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
215 {
216   PetscErrorCode ierr;
217   Mat_FFT        *fft  = (Mat_FFT*)A->data;
218   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
219   PetscScalar    *x_array,*y_array;
220   PetscInt       ndim=fft->ndim,*dim=fft->dim;
221   MPI_Comm       comm=((PetscObject)A)->comm;
222 
223   PetscFunctionBegin;
224   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
225   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
226   if (!fftw->p_backward){ /* create a plan, then excute it */
227     switch (ndim){
228     case 1:
229 #if defined(PETSC_USE_COMPLEX)
230       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
231 #else
232       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
233 #endif
234       break;
235     case 2:
236 #if defined(PETSC_USE_COMPLEX)
237       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
238 #else
239       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
240 #endif
241       break;
242     case 3:
243 #if defined(PETSC_USE_COMPLEX)
244       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
245 #else
246       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
247 #endif
248       break;
249     default:
250 #if defined(PETSC_USE_COMPLEX)
251       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
252 #else
253       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
254 #endif
255       break;
256     }
257     fftw->binarray  = x_array;
258     fftw->boutarray = y_array;
259     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
260   } else { /* use existing plan */
261     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
262       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
263     } else {
264       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
265     }
266   }
267   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
268   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatDestroy_FFTW"
274 PetscErrorCode MatDestroy_FFTW(Mat A)
275 {
276   Mat_FFT        *fft = (Mat_FFT*)A->data;
277   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
278   PetscErrorCode ierr;
279 
280   PetscFunctionBegin;
281   fftw_destroy_plan(fftw->p_forward);
282   fftw_destroy_plan(fftw->p_backward);
283   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
284   ierr = PetscFree(fft->data);CHKERRQ(ierr);
285   PetscFunctionReturn(0);
286 }
287 
288 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
289 #undef __FUNCT__
290 #define __FUNCT__ "VecDestroy_MPIFFTW"
291 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
292 {
293   PetscErrorCode  ierr;
294   PetscScalar     *array;
295 
296   PetscFunctionBegin;
297   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
298   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
299   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
300   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatGetVecs1DC_FFTW"
306 /*
307    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
308      parallel layout determined by FFTW-1D
309 
310 */
311 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout)
312 {
313   PetscErrorCode ierr;
314   PetscMPIInt    size,rank;
315   MPI_Comm       comm=((PetscObject)A)->comm;
316   Mat_FFT        *fft = (Mat_FFT*)A->data;
317 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
318   PetscInt       N=fft->N;
319   PetscInt       ndim=fft->ndim,*dim=fft->dim;
320   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
321   ptrdiff_t      f_local_n1,f_local_1_end;
322   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
323   ptrdiff_t      b_local_n1,b_local_1_end;
324   fftw_complex   *data_fin,*data_fout,*data_bout;
325 
326   PetscFunctionBegin;
327 #if !defined(PETSC_USE_COMPLEX)
328   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
329 #endif
330   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
331   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
332   if (size == 1){
333     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
334   }
335   else {
336       if (ndim>1){
337         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
338       else {
339           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
340           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
341           if (fin) {
342             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
343             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
344             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
345           }
346           if (fout) {
347             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
348             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
349             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
350           }
351           if (bout) {
352             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
353             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
354             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
355           }
356   }
357   if (fin){
358     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
359   }
360   if (fout){
361     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
362   }
363   if (bout){
364     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
365   }
366   PetscFunctionReturn(0);
367 }
368 
369 
370 }
371 
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "MatGetVecsFFT"
375 PetscErrorCode MatGetVecsFFT(Mat A,Vec *x,Vec *y,Vec *z)
376 {
377   PetscErrorCode ierr;
378   PetscFunctionBegin;
379   ierr = PetscTryMethod(A,"MatGetVecsFFT_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
380   PetscFunctionReturn(0);
381 }
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "MatGetVecsFFT_FFTW"
385 /*
386    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
387      parallel layout determined by FFTW
388 
389    Collective on Mat
390 
391    Input Parameter:
392 .  mat - the matrix
393 
394    Output Parameter:
395 +   fin - (optional) input vector of forward FFTW
396 -   fout - (optional) output vector of forward FFTW
397 
398   Level: advanced
399 
400 .seealso: MatCreateFFTW()
401 */
402 EXTERN_C_BEGIN
403 PetscErrorCode  MatGetVecsFFT_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
404 {
405   PetscErrorCode ierr;
406   PetscMPIInt    size,rank;
407   MPI_Comm       comm=((PetscObject)A)->comm;
408   Mat_FFT        *fft = (Mat_FFT*)A->data;
409   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
410   PetscInt       N=fft->N, N1, n1,vsize;
411   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
412 
413   PetscFunctionBegin;
414   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
415   PetscValidType(A,1);
416 
417   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
418   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
419   printf("size is %d\n",size);
420   if (size == 1){ /* sequential case */
421   printf("Routine is getting called\n");
422 #if defined(PETSC_USE_COMPLEX)
423     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
424     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
425     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
426 #else
427     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
428     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
429     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
430 #endif
431   } else {
432     ptrdiff_t      alloc_local,local_n0,local_0_start;
433     ptrdiff_t      local_n1,local_1_end;
434     fftw_complex   *data_fin,*data_fout,*data_bout;
435     double         *data_finr,*data_boutr ;
436     ptrdiff_t      local_1_start,temp;
437     switch (ndim){
438           case 1:
439 #if !defined(PETSC_USE_COMPLEX)
440                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
441 #else
442                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
443                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
444                  if (fin) {
445                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
446                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
447                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
448                          }
449                  if (fout) {
450                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
451                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
452                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
453                          }
454                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
455                  if (bout) {
456                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
457                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
458                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
459                          }
460                  break;
461 
462 
463 #endif
464           case 2:
465 #if !defined(PETSC_USE_COMPLEX)
466                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
467                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
468                  if (fin) {
469                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
470                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
471                            ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
472                            //printf("The code comes here with vector size %d\n",vsize);
473                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
474                           }
475                  if (bout) {
476                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
477                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
478                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
479                            //printf("The code comes here with vector size %d\n",vsize);
480                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
481                           }
482                  if (fout) {
483                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
484                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
485                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
486                            }
487 
488                            //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
489 
490 #else
491       /* Get local size */
492                  printf("Code works for paralllel 2d complex DFT\n");
493                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
494                  if (fin) {
495                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
496                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
497                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
498                           }
499                  if (fout) {
500                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
501                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
502                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
503                            }
504                  if (bout) {
505                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
506                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
507                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
508                           }
509 
510      //printf("Hope this does not come here");
511 #endif
512       break;
513 
514           case 3:
515 #if !defined(PETSC_USE_COMPLEX)
516                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
517                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
518                  if (fin) {
519                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
520                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
521                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
522         //printf("The code comes here with vector size %d\n",vsize);
523                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
524                          }
525                  if (bout) {
526                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
527                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
528                         // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
529         //printf("The code comes here with vector size %d\n",vsize);
530                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
531                           }
532                  if (fout) {
533                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
534                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
535                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
536                           }
537 
538       //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
539 
540 #else
541                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
542 //      printf("The quantity n is %d",n);
543                  if (fin) {
544                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
545                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
546                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
547                          }
548                  if (fout) {
549                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
550                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
551                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
552                          }
553                  if (bout) {
554                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
555                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
556                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
557                          }
558 
559 #endif
560                  break;
561           default:
562 #if !defined(PETSC_USE_COMPLEX)
563                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
564                  printf("The value of temp is %ld\n",temp);
565                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
566                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
567                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
568                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
569 
570                  if (fin) {
571                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
572                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
573                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
574         //printf("The code comes here with vector size %d\n",vsize);
575                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
576                         }
577                  if (bout) {
578                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
579                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
580                          //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
581         //printf("The code comes here with vector size %d\n",vsize);
582                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
583                         }
584                  if (fout) {
585                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
586                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
587                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
588                         }
589 
590 #else
591                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
592                 if (fin) {
593                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
594                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
595                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
596                        }
597                 if (fout) {
598                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
599                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
600                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
601                        }
602                 if (bout) {
603                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
604                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
605                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
606                        }
607 
608 
609 
610 #endif
611             break;
612           }
613   }
614   PetscFunctionReturn(0);
615 }
616 EXTERN_C_END
617 
618 
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatGetVecs_FFTW"
622 /*
623    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
624      parallel layout determined by FFTW
625 
626    Collective on Mat
627 
628    Input Parameter:
629 .  mat - the matrix
630 
631    Output Parameter:
632 +   fin - (optional) input vector of forward FFTW
633 -   fout - (optional) output vector of forward FFTW
634 
635   Level: advanced
636 
637 .seealso: MatCreateFFTW()
638 */
639 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
640 {
641   PetscErrorCode ierr;
642   PetscMPIInt    size,rank;
643   MPI_Comm       comm=((PetscObject)A)->comm;
644   Mat_FFT        *fft = (Mat_FFT*)A->data;
645   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
646   PetscInt       N=fft->N, N1, n1,vsize;
647   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
648 
649   PetscFunctionBegin;
650   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
651   PetscValidType(A,1);
652 
653   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
654   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
655   if (size == 1){ /* sequential case */
656 #if defined(PETSC_USE_COMPLEX)
657     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
658     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
659 #else
660     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
661     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
662 #endif
663   } else {        /* mpi case */
664     ptrdiff_t      alloc_local,local_n0,local_0_start;
665     ptrdiff_t      local_n1,local_1_end;
666     fftw_complex   *data_fin,*data_fout;
667     double         *data_finr ;
668     ptrdiff_t      local_1_start,temp;
669 //    PetscInt ctr;
670 //    ptrdiff_t      ndim1,*pdim;
671 //    ndim1=(ptrdiff_t) ndim;
672 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
673 
674 //    for(ctr=0;ctr<ndim;ctr++)
675 //        {k
676 //           pdim[ctr] = dim[ctr];
677 //       }
678 
679 
680 
681     switch (ndim){
682     case 1:
683       /* Get local size */
684       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
685 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
686       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
687       printf("The values of local_n0 and local_n1 are %d and %d",local_n0,local_n1);
688       if (fin) {
689         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
690         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
691         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
692       }
693       if (fout) {
694         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
695         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
696         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
697       }
698       break;
699     case 2:
700 #if !defined(PETSC_USE_COMPLEX)
701       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
702       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
703       if (fin) {
704         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
705         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
706         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
707         //printf("The code comes here with vector size %d\n",vsize);
708         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
709       }
710       n1 = 2*local_n1*(dim[0]);
711       //n1 = 2*local_n1*dim[1];
712       printf("The values are %ld\n",local_n1);
713       if (fout) {
714         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
715         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
716         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
717       }
718       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
719 
720 #else
721       /* Get local size */
722      //printf("Hope this does not come here");
723       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
724       if (fin) {
725         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
726         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
727         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
728       }
729       if (fout) {
730         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
731         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
732         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
733       }
734      //printf("Hope this does not come here");
735 #endif
736       break;
737     case 3:
738       /* Get local size */
739 #if !defined(PETSC_USE_COMPLEX)
740       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
741       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
742       if (fin) {
743         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
744         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
745         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
746         //printf("The code comes here with vector size %d\n",vsize);
747         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
748       }
749       printf("The value is %ld",local_n1);
750       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
751       if (fout) {
752         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
753         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
754         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
755       }
756       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
757 
758 
759 #else
760       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
761 //      printf("The quantity n is %d",n);
762       if (fin) {
763         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
764         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
765         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
766       }
767       if (fout) {
768         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
769         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
770         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
771       }
772 #endif
773       break;
774     default:
775       /* Get local size */
776 #if !defined(PETSC_USE_COMPLEX)
777       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
778       printf("The value of temp is %ld\n",temp);
779       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
780       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
781       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
782       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
783       if (fin) {
784         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
785         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
786         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
787         //printf("The code comes here with vector size %d\n",vsize);
788         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
789       }
790       printf("The value is %ld. Global length is %d \n",local_n1,N1);
791       temp = fftw->partial_dim;
792       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
793 
794       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
795       if (fout) {
796         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
797         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
798         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
799       }
800 
801 #else
802       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
803 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
804 //      printf("The value of alloc local is %d",alloc_local);
805 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
806 //      for(i=0;i<ndim;i++)
807 //         {
808 //          pdim[i]=dim[i];printf("%d",pdim[i]);
809 //         }
810 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
811 //      printf("The quantity n is %d",n);
812       if (fin) {
813         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
814         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
815         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
816       }
817       if (fout) {
818         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
819         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
820         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
821       }
822 #endif
823       break;
824     }
825   }
826 //  if (fin){
827 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
828 //  }
829 //  if (fout){
830 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
831 //  }
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "InputTransformFFT"
837 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
838 {
839   PetscErrorCode ierr;
840   PetscFunctionBegin;
841   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
842   PetscFunctionReturn(0);
843 }
844 
845 /*
846       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
847   Input A, x, y
848   A - FFTW matrix
849   x - user data
850   Options Database Keys:
851 + -mat_fftw_plannerflags - set FFTW planner flags
852 
853    Level: intermediate
854 
855 */
856 
857 EXTERN_C_BEGIN
858 #undef __FUNCT__
859 #define __FUNCT__ "InputTransformFFT_FTTW"
860 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
861 {
862   PetscErrorCode ierr;
863   MPI_Comm       comm=((PetscObject)A)->comm;
864   Mat_FFT        *fft = (Mat_FFT*)A->data;
865   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
866   PetscInt       N=fft->N, N1, n1 ,NM;
867   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
868   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
869   PetscInt       i,j,k,rank,size,partial_dim;
870   ptrdiff_t      alloc_local,local_n0,local_0_start;
871   ptrdiff_t      local_n1,local_1_start,temp;
872   VecScatter     vecscat;
873   IS             list1,list2;
874 
875   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
876   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
877   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
878   printf("Local ownership starts at %d\n",low);
879 
880   if (size==1)
881     {
882 /*     switch (ndim){
883      case 1:
884           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
885           for (i=0;i<dim[0];i++)
886              {
887               indx1[i] = i;
888              }
889           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
890           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
891           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
894           ierr = ISDestroy(&list1);CHKERRQ(ierr);
895           ierr = PetscFree(indx1);CHKERRQ(ierr);
896      break;
897 
898      case 2:
899           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
900           for (i=0;i<dim[0];i++){
901              for (j=0;j<dim[1];j++){
902                 indx1[i*dim[1]+j] = i*dim[1] + j;
903              }
904           }
905           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
906           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
907           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
908           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
910           ierr = ISDestroy(&list1);CHKERRQ(ierr);
911           ierr = PetscFree(indx1);CHKERRQ(ierr);
912           break;
913      case 3:
914           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
915           for (i=0;i<dim[0];i++){
916              for (j=0;j<dim[1];j++){
917                 for (k=0;k<dim[2];k++){
918                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
919                 }
920              }
921           }
922           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
923           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
924           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
927           ierr = ISDestroy(&list1);CHKERRQ(ierr);
928           ierr = PetscFree(indx1);CHKERRQ(ierr);
929           break;
930      default:
931 */
932           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
933           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
934           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
937           ierr = ISDestroy(&list1);CHKERRQ(ierr);
938           //ierr = ISDestroy(list1);CHKERRQ(ierr);
939  //         break;
940   //    }
941     }
942 
943  else{
944 
945  switch (ndim){
946  case 1:
947 #if defined(PETSC_USE_COMPLEX)
948       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
949       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
950       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
951       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
952       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
953       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
954       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
956       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
957       ierr = ISDestroy(&list1);CHKERRQ(ierr);
958       ierr = ISDestroy(&list2);CHKERRQ(ierr);
959       break;
960 #else
961   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
962 #endif
963   break;
964  case 2:
965 #if defined(PETSC_USE_COMPLEX)
966       //PetscInt my_value;
967       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
968       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
969       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
970       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
971       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
972       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
973       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
974       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
976       ierr = ISDestroy(&list1);CHKERRQ(ierr);
977       ierr = ISDestroy(&list2);CHKERRQ(ierr);
978       break;
979 #else
980       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
981       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
982 
983       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
984       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
985       printf("Val local_0_start = %ld\n",local_0_start);
986 
987       if (dim[1]%2==0)
988         NM = dim[1]+2;
989       else
990         NM = dim[1]+1;
991 
992       for (i=0;i<local_n0;i++){
993          for (j=0;j<dim[1];j++){
994             tempindx = i*dim[1] + j;
995             tempindx1 = i*NM + j;
996             indx1[tempindx]=local_0_start*dim[1]+tempindx;
997             indx2[tempindx]=low+tempindx1;
998            // printf("Val tempindx1 = %d\n",tempindx1);
999   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1000   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1001   //          printf("-------------------------\n",indx2[tempindx],rank);
1002         }
1003      }
1004 
1005       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1006       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1007 
1008       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1009       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1011       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1012       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1013       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1014       ierr = PetscFree(indx1);CHKERRQ(ierr);
1015       ierr = PetscFree(indx2);CHKERRQ(ierr);
1016       break;
1017 #endif
1018 
1019  case 3:
1020       #if defined(PETSC_USE_COMPLEX)
1021       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1022       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1023       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1024       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1025       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1026       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1027       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1030       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1031       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1032       break;
1033 #else
1034       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1035       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1036 
1037       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1038       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1039       printf("Val local_0_start = %ld\n",local_0_start);
1040 
1041       if (dim[2]%2==0)
1042         NM = dim[2]+2;
1043       else
1044         NM = dim[2]+1;
1045 
1046       for (i=0;i<local_n0;i++){
1047          for (j=0;j<dim[1];j++){
1048             for (k=0;k<dim[2];k++){
1049                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1050                tempindx1 = i*dim[1]*NM + j*NM + k;
1051                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1052                indx2[tempindx]=low+tempindx1;
1053             }
1054            // printf("Val tempindx1 = %d\n",tempindx1);
1055            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1056            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1057            // printf("-------------------------\n",indx2[tempindx],rank);
1058         }
1059      }
1060 
1061       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1062       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1063 
1064       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1065       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1066       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1067       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1068       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1069       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1070       ierr = PetscFree(indx1);CHKERRQ(ierr);
1071       ierr = PetscFree(indx2);CHKERRQ(ierr);
1072       break;
1073 #endif
1074 
1075  default:
1076 #if defined(PETSC_USE_COMPLEX)
1077       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1078       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1079       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1080       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1081       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1082       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1083       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1084       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1086       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1087       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1088 
1089 
1090 #else
1091       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1092       printf("The value of temp is %ld\n",temp);
1093       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1094       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1095       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1096       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1097 
1098       partial_dim = fftw->partial_dim;
1099       printf("The value of partial dim is %d\n",partial_dim);
1100 
1101       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1102       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1103       printf("Val local_0_start = %ld\n",local_0_start);
1104 
1105       if (dim[ndim-1]%2==0)
1106         NM = 2;
1107       else
1108         NM = 1;
1109 
1110       j = low;
1111       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1112          {
1113           indx1[i] = local_0_start*partial_dim + i;
1114           indx2[i] = j;
1115           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1116           if (k%dim[ndim-1]==0)
1117             { j+=NM;}
1118           j++;
1119          }
1120       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1121       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1122       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1123       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1124       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1125       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1126       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1127       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1128       ierr = PetscFree(indx1);CHKERRQ(ierr);
1129       ierr = PetscFree(indx2);CHKERRQ(ierr);
1130       break;
1131 #endif
1132   }
1133  }
1134 
1135  return 0;
1136 }
1137 EXTERN_C_END
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "OutputTransformFFT"
1141 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
1142 {
1143   PetscErrorCode ierr;
1144   PetscFunctionBegin;
1145   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 /*
1150       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
1151   Input A, x, y
1152   A - FFTW matrix
1153   x - FFTW vector
1154   y - PETSc vector that user can read
1155   Options Database Keys:
1156 + -mat_fftw_plannerflags - set FFTW planner flags
1157 
1158    Level: intermediate
1159 
1160 */
1161 
1162 EXTERN_C_BEGIN
1163 #undef __FUNCT__
1164 #define __FUNCT__ "OutputTransformFFT_FTTW"
1165 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
1166 {
1167   PetscErrorCode ierr;
1168   MPI_Comm       comm=((PetscObject)A)->comm;
1169   Mat_FFT        *fft = (Mat_FFT*)A->data;
1170   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
1171   PetscInt       N=fft->N, N1, n1 ,NM;
1172   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
1173   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
1174   PetscInt       i,j,k,rank,size,partial_dim;
1175   ptrdiff_t      alloc_local,local_n0,local_0_start;
1176   ptrdiff_t      local_n1,local_1_start,temp;
1177   VecScatter     vecscat;
1178   IS             list1,list2;
1179 
1180   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1181   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1182   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
1183 
1184   if (size==1){
1185 /*
1186     switch (ndim){
1187     case 1:
1188            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1189           for (i=0;i<dim[0];i++)
1190              {
1191               indx1[i] = i;
1192              }
1193           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1194           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1195           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1196           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1197           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1198           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1199           ierr = PetscFree(indx1);CHKERRQ(ierr);
1200           break;
1201 
1202     case 2:
1203          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1204           for (i=0;i<dim[0];i++){
1205              for (j=0;j<dim[1];j++){
1206                 indx1[i*dim[1]+j] = i*dim[1] + j;
1207              }
1208           }
1209          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1210          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1211          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1212          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1214          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1215          ierr = PetscFree(indx1);CHKERRQ(ierr);
1216          break;
1217     case 3:
1218          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1219          for (i=0;i<dim[0];i++){
1220             for (j=0;j<dim[1];j++){
1221                for (k=0;k<dim[2];k++){
1222                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1223                }
1224             }
1225          }
1226          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1227          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1228          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1231          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1232          ierr = PetscFree(indx1);CHKERRQ(ierr);
1233          break;
1234     default:
1235 */
1236          ierr = ISCreateStride(comm,N,0,1,&list1);
1237          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1238          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1239          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1241          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1242          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1243   //       break;
1244    // }
1245   }
1246   else{
1247 
1248  switch (ndim){
1249  case 1:
1250 #if defined(PETSC_USE_COMPLEX)
1251       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1252       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
1253       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
1254       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1255       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1256       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1257       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1258       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1259       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1260       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1261       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1262       break;
1263 
1264 #else
1265       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1266 #endif
1267   break;
1268  case 2:
1269 #if defined(PETSC_USE_COMPLEX)
1270       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1271       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1272       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1273       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1274       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1275       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1277       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1278       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1279       break;
1280 #else
1281       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1282       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
1283 
1284      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1285      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1286      printf("Val local_0_start = %ld\n",local_0_start);
1287 
1288      if (dim[1]%2==0)
1289       NM = dim[1]+2;
1290     else
1291       NM = dim[1]+1;
1292 
1293 
1294 
1295      for (i=0;i<local_n0;i++){
1296         for (j=0;j<dim[1];j++){
1297             tempindx = i*dim[1] + j;
1298             tempindx1 = i*NM + j;
1299             indx1[tempindx]=local_0_start*dim[1]+tempindx;
1300             indx2[tempindx]=low+tempindx1;
1301        //     printf("Val tempindx1 = %d\n",tempindx1);
1302        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1303        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1304        //     printf("-------------------------\n",indx2[tempindx],rank);
1305         }
1306      }
1307 
1308      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1309      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1310 
1311      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1312      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1313      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1314      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1315      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1316      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1317      ierr = PetscFree(indx1);CHKERRQ(ierr);
1318      ierr = PetscFree(indx2);CHKERRQ(ierr);
1319   break;
1320 #endif
1321  case 3:
1322 #if defined(PETSC_USE_COMPLEX)
1323       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1324       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1325       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1326       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1327       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1328       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1329       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1330       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1331       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1332       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1333       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1334       break;
1335 #else
1336 
1337       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1338       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1339 
1340      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1341      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1342      printf("Val local_0_start = %ld\n",local_0_start);
1343 
1344      if (dim[2]%2==0)
1345       NM = dim[2]+2;
1346     else
1347       NM = dim[2]+1;
1348 
1349      for (i=0;i<local_n0;i++){
1350         for (j=0;j<dim[1];j++){
1351            for (k=0;k<dim[2];k++){
1352               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1353               tempindx1 = i*dim[1]*NM + j*NM + k;
1354               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1355               indx2[tempindx]=low+tempindx1;
1356            }
1357         //    printf("Val tempindx1 = %d\n",tempindx1);
1358         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1359         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1360         //    printf("-------------------------\n",indx2[tempindx],rank);
1361         }
1362      }
1363 
1364      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1365      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1366 
1367      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1368      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1369      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1370      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1371      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1372      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1373      ierr = PetscFree(indx1);CHKERRQ(ierr);
1374      ierr = PetscFree(indx2);CHKERRQ(ierr);
1375   break;
1376 #endif
1377   default:
1378 #if defined(PETSC_USE_COMPLEX)
1379       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1380       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1381       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1382       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1383       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1384       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1385       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1386       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1387       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1388       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1389       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1390 #else
1391      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1392      printf("The value of temp is %ld\n",temp);
1393      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1394      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1395      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1396      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1397 
1398      partial_dim = fftw->partial_dim;
1399      printf("The value of partial dim is %d\n",partial_dim);
1400 
1401      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1402      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1403      printf("Val local_0_start = %ld\n",local_0_start);
1404 
1405      if (dim[ndim-1]%2==0)
1406        NM = 2;
1407      else
1408        NM = 1;
1409 
1410      j = low;
1411      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1412         {
1413          indx1[i] = local_0_start*partial_dim + i;
1414          indx2[i] = j;
1415          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1416          if (k%dim[ndim-1]==0)
1417            { j+=NM;}
1418          j++;
1419         }
1420      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1421      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1422 
1423       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1424 
1425 
1426      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1427      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1428      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1429      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1430      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1431      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1432      ierr = PetscFree(indx1);CHKERRQ(ierr);
1433      ierr = PetscFree(indx2);CHKERRQ(ierr);
1434 
1435      break;
1436 #endif
1437  }
1438  }
1439  return 0;
1440 }
1441 EXTERN_C_END
1442 
1443 EXTERN_C_BEGIN
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatCreate_FFTW"
1446 /*
1447       MatCreate_FFTW - Creates a matrix object that provides FFT
1448   via the external package FFTW
1449   Options Database Keys:
1450 + -mat_fftw_plannerflags - set FFTW planner flags
1451 
1452    Level: intermediate
1453 
1454 */
1455 
1456 PetscErrorCode MatCreate_FFTW(Mat A)
1457 {
1458   PetscErrorCode ierr;
1459   MPI_Comm       comm=((PetscObject)A)->comm;
1460   Mat_FFT        *fft=(Mat_FFT*)A->data;
1461   Mat_FFTW       *fftw;
1462   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1463   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1464   PetscBool      flg;
1465   PetscInt       p_flag,partial_dim=1,ctr,N1;
1466   PetscMPIInt    size,rank;
1467   ptrdiff_t      *pdim, temp;
1468   ptrdiff_t      local_n1,local_1_start,local_1_end;
1469 
1470   PetscFunctionBegin;
1471   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1472   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1473   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1474 
1475   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1476   pdim[0] = dim[0];
1477   for(ctr=1;ctr<ndim;ctr++)
1478       {
1479           partial_dim *= dim[ctr];
1480           pdim[ctr] = dim[ctr];
1481       }
1482 
1483   if (size == 1) {
1484 #if defined(PETSC_USE_COMPLEX)
1485     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1486     n = N;
1487 #else
1488     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1489     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1490     n = tot_dim;
1491 #endif
1492 
1493   } else {
1494     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1495     switch (ndim){
1496     case 1:
1497 #if !defined(PETSC_USE_COMPLEX)
1498   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1499 #else
1500       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
1501       n = (PetscInt)local_n0;
1502       printf("The value of n is %d",n);
1503       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1504 #endif
1505 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1506       break;
1507     case 2:
1508 #if defined(PETSC_USE_COMPLEX)
1509       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1510       /*
1511        PetscMPIInt    rank;
1512        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1513        PetscSynchronizedFlush(comm);
1514        */
1515       n = (PetscInt)local_n0*dim[1];
1516       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1517 #else
1518       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1519       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1520       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1521 #endif
1522       break;
1523     case 3:
1524 //      printf("The value of alloc local is %d",alloc_local);
1525 #if defined(PETSC_USE_COMPLEX)
1526       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1527       n = (PetscInt)local_n0*dim[1]*dim[2];
1528       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1529 #else
1530       printf("The code comes here\n");
1531       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1532       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1533       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1534 #endif
1535       break;
1536     default:
1537 #if defined(PETSC_USE_COMPLEX)
1538       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1539 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
1540 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
1541       n = (PetscInt)local_n0*partial_dim;
1542 //      printf("New partial dimension is %d %d %d",n,N,ndim);
1543       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1544 #else
1545       temp = pdim[ndim-1];
1546       pdim[ndim-1]= temp/2 + 1;
1547       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1548       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1549       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1550       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1551       pdim[ndim-1] = temp;
1552       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1553       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1554 #endif
1555       break;
1556     }
1557   }
1558   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1559   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1560   fft->data = (void*)fftw;
1561 
1562   fft->n           = n;
1563   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1564   fftw->partial_dim = partial_dim;
1565   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1566   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1567 
1568   fftw->p_forward  = 0;
1569   fftw->p_backward = 0;
1570   fftw->p_flag     = FFTW_ESTIMATE;
1571 
1572   if (size == 1){
1573     A->ops->mult          = MatMult_SeqFFTW;
1574     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1575   } else {
1576     A->ops->mult          = MatMult_MPIFFTW;
1577     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1578   }
1579   fft->matdestroy          = MatDestroy_FFTW;
1580 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1581   A->ops->getvecs       = MatGetVecs_FFTW;
1582   A->assembled          = PETSC_TRUE;
1583   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFT_C","MatGetVecsFFT_FFTW",MatGetVecsFFT_FFTW);
1584   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1585   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1586 
1587   /* get runtime options */
1588   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1589     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1590     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1591   PetscOptionsEnd();
1592   PetscFunctionReturn(0);
1593 }
1594 EXTERN_C_END
1595 
1596 
1597 
1598 
1599