xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 415e9c8c4abb650e23f4ce9cdf3dcc61cc320e1d)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28 extern PetscErrorCode MatGetVecsFFT_FFTW(Mat,Vec*,Vec*,Vec*);
29 
30 #undef __FUNCT__
31 #define __FUNCT__ "MatMult_SeqFFTW"
32 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
33 {
34   PetscErrorCode ierr;
35   Mat_FFT        *fft  = (Mat_FFT*)A->data;
36   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
37   PetscScalar    *x_array,*y_array;
38   PetscInt       ndim=fft->ndim,*dim=fft->dim;
39 
40   PetscFunctionBegin;
41   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
42   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
43   if (!fftw->p_forward){ /* create a plan, then excute it */
44     switch (ndim){
45     case 1:
46 #if defined(PETSC_USE_COMPLEX)
47       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
48 #else
49       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
50 #endif
51       break;
52     case 2:
53 #if defined(PETSC_USE_COMPLEX)
54       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
55 #else
56       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
57 #endif
58       break;
59     case 3:
60 #if defined(PETSC_USE_COMPLEX)
61       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
62 #else
63       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
64 #endif
65       break;
66     default:
67 #if defined(PETSC_USE_COMPLEX)
68       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
69 #else
70       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
71 #endif
72       break;
73     }
74     fftw->finarray  = x_array;
75     fftw->foutarray = y_array;
76     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
77                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
78     fftw_execute(fftw->p_forward);
79   } else { /* use existing plan */
80     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
81       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
82     } else {
83       fftw_execute(fftw->p_forward);
84     }
85   }
86   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
87   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
88   PetscFunctionReturn(0);
89 }
90 
91 #undef __FUNCT__
92 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
93 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
94 {
95   PetscErrorCode ierr;
96   Mat_FFT        *fft = (Mat_FFT*)A->data;
97   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
98   PetscScalar    *x_array,*y_array;
99   PetscInt       ndim=fft->ndim,*dim=fft->dim;
100 
101   PetscFunctionBegin;
102   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
103   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
104   if (!fftw->p_backward){ /* create a plan, then excute it */
105     switch (ndim){
106     case 1:
107 #if defined(PETSC_USE_COMPLEX)
108       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
109 #else
110       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
111 #endif
112       break;
113     case 2:
114 #if defined(PETSC_USE_COMPLEX)
115       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
116 #else
117       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
118 #endif
119       break;
120     case 3:
121 #if defined(PETSC_USE_COMPLEX)
122       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
123 #else
124       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
125 #endif
126       break;
127     default:
128 #if defined(PETSC_USE_COMPLEX)
129       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
130 #else
131       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
132 #endif
133       break;
134     }
135     fftw->binarray  = x_array;
136     fftw->boutarray = y_array;
137     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
138   } else { /* use existing plan */
139     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
140       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
141     } else {
142       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
143     }
144   }
145   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
146   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 #undef __FUNCT__
151 #define __FUNCT__ "MatMult_MPIFFTW"
152 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
153 {
154   PetscErrorCode ierr;
155   Mat_FFT        *fft  = (Mat_FFT*)A->data;
156   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
157   PetscScalar    *x_array,*y_array;
158   PetscInt       ndim=fft->ndim,*dim=fft->dim;
159   MPI_Comm       comm=((PetscObject)A)->comm;
160 
161   PetscFunctionBegin;
162   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
163   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
164   if (!fftw->p_forward){ /* create a plan, then excute it */
165     switch (ndim){
166     case 1:
167 #if defined(PETSC_USE_COMPLEX)
168       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
169 #else
170       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
171 #endif
172       break;
173     case 2:
174 #if defined(PETSC_USE_COMPLEX)
175       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
176 #else
177       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
178 #endif
179       break;
180     case 3:
181 #if defined(PETSC_USE_COMPLEX)
182       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
183 #else
184       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
185 #endif
186       break;
187     default:
188 #if defined(PETSC_USE_COMPLEX)
189       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
190 #else
191       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
192 #endif
193       break;
194     }
195     fftw->finarray  = x_array;
196     fftw->foutarray = y_array;
197     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
198                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
199     fftw_execute(fftw->p_forward);
200   } else { /* use existing plan */
201     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
202       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
203     } else {
204       fftw_execute(fftw->p_forward);
205     }
206   }
207   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
208   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
214 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
215 {
216   PetscErrorCode ierr;
217   Mat_FFT        *fft  = (Mat_FFT*)A->data;
218   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
219   PetscScalar    *x_array,*y_array;
220   PetscInt       ndim=fft->ndim,*dim=fft->dim;
221   MPI_Comm       comm=((PetscObject)A)->comm;
222 
223   PetscFunctionBegin;
224   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
225   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
226   if (!fftw->p_backward){ /* create a plan, then excute it */
227     switch (ndim){
228     case 1:
229 #if defined(PETSC_USE_COMPLEX)
230       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
231 #else
232       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
233 #endif
234       break;
235     case 2:
236 #if defined(PETSC_USE_COMPLEX)
237       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
238 #else
239       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
240 #endif
241       break;
242     case 3:
243 #if defined(PETSC_USE_COMPLEX)
244       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
245 #else
246       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
247 #endif
248       break;
249     default:
250 #if defined(PETSC_USE_COMPLEX)
251       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
252 #else
253       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
254 #endif
255       break;
256     }
257     fftw->binarray  = x_array;
258     fftw->boutarray = y_array;
259     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
260   } else { /* use existing plan */
261     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
262       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
263     } else {
264       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
265     }
266   }
267   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
268   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatDestroy_FFTW"
274 PetscErrorCode MatDestroy_FFTW(Mat A)
275 {
276   Mat_FFT        *fft = (Mat_FFT*)A->data;
277   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
278   PetscErrorCode ierr;
279 
280   PetscFunctionBegin;
281   fftw_destroy_plan(fftw->p_forward);
282   fftw_destroy_plan(fftw->p_backward);
283   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
284   ierr = PetscFree(fft->data);CHKERRQ(ierr);
285   PetscFunctionReturn(0);
286 }
287 
288 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
289 #undef __FUNCT__
290 #define __FUNCT__ "VecDestroy_MPIFFTW"
291 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
292 {
293   PetscErrorCode  ierr;
294   PetscScalar     *array;
295 
296   PetscFunctionBegin;
297   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
298   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
299   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
300   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatGetVecs1DC_FFTW"
306 /*
307    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
308      parallel layout determined by FFTW-1D
309 
310 */
311 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout)
312 {
313   PetscErrorCode ierr;
314   PetscMPIInt    size,rank;
315   MPI_Comm       comm=((PetscObject)A)->comm;
316   Mat_FFT        *fft = (Mat_FFT*)A->data;
317 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
318   PetscInt       N=fft->N;
319   PetscInt       ndim=fft->ndim,*dim=fft->dim;
320   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
321   ptrdiff_t      f_local_n1,f_local_1_end;
322   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
323   ptrdiff_t      b_local_n1,b_local_1_end;
324   fftw_complex   *data_fin,*data_fout,*data_bout;
325 
326   PetscFunctionBegin;
327 #if !defined(PETSC_USE_COMPLEX)
328   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
329 #endif
330   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
331   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
332   if (size == 1){
333     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
334   }
335   else {
336       if (ndim>1){
337         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
338       else {
339           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
340           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
341           if (fin) {
342             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
343             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
344             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
345           }
346           if (fout) {
347             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
348             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
349             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
350           }
351           if (bout) {
352             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
353             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
354             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
355           }
356   }
357   if (fin){
358     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
359   }
360   if (fout){
361     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
362   }
363   if (bout){
364     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
365   }
366   PetscFunctionReturn(0);
367 }
368 
369 
370 }
371 
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "MatGetVecsFFT"
375 PetscErrorCode MatGetVecsFFT(Mat A,Vec *x,Vec *y,Vec *z)
376 {
377   PetscErrorCode ierr;
378   PetscFunctionBegin;
379   ierr = PetscTryMethod(A,"MatGetVecsFFT_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
380   PetscFunctionReturn(0);
381 }
382 
383 #undef __FUNCT__
384 #define __FUNCT__ "MatGetVecsFFT_FFTW"
385 /*
386    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
387      parallel layout determined by FFTW
388 
389    Collective on Mat
390 
391    Input Parameter:
392 .  mat - the matrix
393 
394    Output Parameter:
395 +   fin - (optional) input vector of forward FFTW
396 -   fout - (optional) output vector of forward FFTW
397 
398   Level: advanced
399 
400 .seealso: MatCreateFFTW()
401 */
402 EXTERN_C_BEGIN
403 PetscErrorCode  MatGetVecsFFT_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
404 {
405   PetscErrorCode ierr;
406   PetscMPIInt    size,rank;
407   MPI_Comm       comm=((PetscObject)A)->comm;
408   Mat_FFT        *fft = (Mat_FFT*)A->data;
409   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
410   PetscInt       N=fft->N, N1, n1,vsize;
411   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
412 
413   PetscFunctionBegin;
414   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
415   PetscValidType(A,1);
416 
417   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
418   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
419   printf("size is %d\n",size);
420   if (size == 1){ /* sequential case */
421   printf("Routine is getting called\n");
422 #if defined(PETSC_USE_COMPLEX)
423     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
424     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
425     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
426 #else
427     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
428     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
429     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
430 #endif
431   } else {
432     ptrdiff_t      alloc_local,local_n0,local_0_start;
433     ptrdiff_t      local_n1,local_1_end;
434     fftw_complex   *data_fin,*data_fout,*data_bout;
435     double         *data_finr,*data_boutr ;
436     ptrdiff_t      local_1_start,temp;
437     switch (ndim){
438           case 1:
439                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
440           case 2:
441 #if !defined(PETSC_USE_COMPLEX)
442                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
443                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
444                  if (fin) {
445                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
446                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
447                            ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
448                            //printf("The code comes here with vector size %d\n",vsize);
449                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
450                           }
451                  if (fout) {
452                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
453                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
454                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
455                            }
456                  if (bout) {
457                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
458                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
459                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
460                            //printf("The code comes here with vector size %d\n",vsize);
461                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
462                           }
463 
464                            //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
465 
466 #else
467       /* Get local size */
468                  printf("Code works for paralllel 2d complex DFT\n");
469                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
470                  if (fin) {
471                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
472                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
473                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
474                           }
475                  if (fout) {
476                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
477                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
478                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
479                            }
480                  if (bout) {
481                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
482                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
483                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
484                           }
485 
486      //printf("Hope this does not come here");
487 #endif
488       break;
489 
490           case 3:
491 #if !defined(PETSC_USE_COMPLEX)
492                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
493                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
494                  if (fin) {
495                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
496                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
497                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
498         //printf("The code comes here with vector size %d\n",vsize);
499                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
500                          }
501                  if (fout) {
502                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
503                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
504                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
505                           }
506                  if (bout) {
507                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
508                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
509                         // ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
510         //printf("The code comes here with vector size %d\n",vsize);
511                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
512                           }
513 
514       //printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
515 
516 #else
517                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
518 //      printf("The quantity n is %d",n);
519                  if (fin) {
520                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
521                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
522                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
523                          }
524                  if (fout) {
525                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
526                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
527                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
528                          }
529                  if (bout) {
530                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
531                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
532                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
533                          }
534 
535 #endif
536                  break;
537           default:
538 #if !defined(PETSC_USE_COMPLEX)
539                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
540                  printf("The value of temp is %ld\n",temp);
541                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
542                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
543                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
544                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
545 
546                  if (fin) {
547                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
548                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
549                          ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
550         //printf("The code comes here with vector size %d\n",vsize);
551                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
552                         }
553                  if (fout) {
554                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
555                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
556                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
557                         }
558                  if (bout) {
559                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
560                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
561                          //ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
562         //printf("The code comes here with vector size %d\n",vsize);
563                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
564                         }
565 
566 #else
567                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
568                 if (fin) {
569                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
570                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
571                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
572                        }
573                 if (fout) {
574                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
575                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
576                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
577                        }
578                 if (bout) {
579                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
580                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
581                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
582                        }
583 
584 
585 
586 #endif
587             break;
588           }
589   }
590   PetscFunctionReturn(0);
591 }
592 EXTERN_C_END
593 
594 
595 
596 #undef __FUNCT__
597 #define __FUNCT__ "MatGetVecs_FFTW"
598 /*
599    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
600      parallel layout determined by FFTW
601 
602    Collective on Mat
603 
604    Input Parameter:
605 .  mat - the matrix
606 
607    Output Parameter:
608 +   fin - (optional) input vector of forward FFTW
609 -   fout - (optional) output vector of forward FFTW
610 
611   Level: advanced
612 
613 .seealso: MatCreateFFTW()
614 */
615 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
616 {
617   PetscErrorCode ierr;
618   PetscMPIInt    size,rank;
619   MPI_Comm       comm=((PetscObject)A)->comm;
620   Mat_FFT        *fft = (Mat_FFT*)A->data;
621   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
622   PetscInt       N=fft->N, N1, n1,vsize;
623   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
624 
625   PetscFunctionBegin;
626   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
627   PetscValidType(A,1);
628 
629   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
630   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
631   if (size == 1){ /* sequential case */
632 #if defined(PETSC_USE_COMPLEX)
633     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
634     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
635 #else
636     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
637     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
638 #endif
639   } else {        /* mpi case */
640     ptrdiff_t      alloc_local,local_n0,local_0_start;
641     ptrdiff_t      local_n1,local_1_end;
642     fftw_complex   *data_fin,*data_fout;
643     double         *data_finr ;
644     ptrdiff_t      local_1_start,temp;
645 //    PetscInt ctr;
646 //    ptrdiff_t      ndim1,*pdim;
647 //    ndim1=(ptrdiff_t) ndim;
648 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
649 
650 //    for(ctr=0;ctr<ndim;ctr++)
651 //        {k
652 //           pdim[ctr] = dim[ctr];
653 //       }
654 
655 
656 
657     switch (ndim){
658     case 1:
659       /* Get local size */
660       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
661 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
662       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
663       if (fin) {
664         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
665         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
666         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
667       }
668       if (fout) {
669         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
670         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
671         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
672       }
673       break;
674     case 2:
675 #if !defined(PETSC_USE_COMPLEX)
676       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
677       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
678       if (fin) {
679         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
680         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
681         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
682         //printf("The code comes here with vector size %d\n",vsize);
683         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
684       }
685       if (fout) {
686         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
687         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
688         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
689       }
690       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
691 
692 #else
693       /* Get local size */
694      //printf("Hope this does not come here");
695       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
696       if (fin) {
697         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
698         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
699         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
700       }
701       if (fout) {
702         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
703         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
704         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
705       }
706      //printf("Hope this does not come here");
707 #endif
708       break;
709     case 3:
710       /* Get local size */
711 #if !defined(PETSC_USE_COMPLEX)
712       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
713       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
714       if (fin) {
715         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
716         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
717         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
718         //printf("The code comes here with vector size %d\n",vsize);
719         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
720       }
721       if (fout) {
722         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
723         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
724         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
725       }
726       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
727 
728 
729 #else
730       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
731 //      printf("The quantity n is %d",n);
732       if (fin) {
733         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
734         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
735         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
736       }
737       if (fout) {
738         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
739         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
740         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
741       }
742 #endif
743       break;
744     default:
745       /* Get local size */
746 #if !defined(PETSC_USE_COMPLEX)
747       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
748       printf("The value of temp is %ld\n",temp);
749       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
750       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
751       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
752       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
753       if (fin) {
754         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
755         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
756         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
757         //printf("The code comes here with vector size %d\n",vsize);
758         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
759       }
760       if (fout) {
761         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
762         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
763         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
764       }
765 
766 #else
767       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
768 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
769 //      printf("The value of alloc local is %d",alloc_local);
770 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
771 //      for(i=0;i<ndim;i++)
772 //         {
773 //          pdim[i]=dim[i];printf("%d",pdim[i]);
774 //         }
775 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
776 //      printf("The quantity n is %d",n);
777       if (fin) {
778         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
779         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
780         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
781       }
782       if (fout) {
783         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
784         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
785         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
786       }
787 #endif
788       break;
789     }
790   }
791 //  if (fin){
792 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
793 //  }
794 //  if (fout){
795 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
796 //  }
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "InputTransformFFT"
802 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
803 {
804   PetscErrorCode ierr;
805   PetscFunctionBegin;
806   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
807   PetscFunctionReturn(0);
808 }
809 
810 /*
811       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
812   Input A, x, y
813   A - FFTW matrix
814   x - user data
815   Options Database Keys:
816 + -mat_fftw_plannerflags - set FFTW planner flags
817 
818    Level: intermediate
819 
820 */
821 
822 EXTERN_C_BEGIN
823 #undef __FUNCT__
824 #define __FUNCT__ "InputTransformFFT_FTTW"
825 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
826 {
827   PetscErrorCode ierr;
828   MPI_Comm       comm=((PetscObject)A)->comm;
829   Mat_FFT        *fft = (Mat_FFT*)A->data;
830   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
831   PetscInt       N=fft->N, N1, n1 ,NM;
832   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
833   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
834   PetscInt       i,j,k,rank,size,partial_dim;
835   ptrdiff_t      alloc_local,local_n0,local_0_start;
836   ptrdiff_t      local_n1,local_1_start,temp;
837   VecScatter     vecscat;
838   IS             list1,list2;
839 
840   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
841   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
842   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
843   printf("Local ownership starts at %d\n",low);
844 
845   if (size==1)
846     {
847 /*     switch (ndim){
848      case 1:
849           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
850           for (i=0;i<dim[0];i++)
851              {
852               indx1[i] = i;
853              }
854           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
855           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
856           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
857           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
859           ierr = ISDestroy(&list1);CHKERRQ(ierr);
860           ierr = PetscFree(indx1);CHKERRQ(ierr);
861      break;
862 
863      case 2:
864           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
865           for (i=0;i<dim[0];i++){
866              for (j=0;j<dim[1];j++){
867                 indx1[i*dim[1]+j] = i*dim[1] + j;
868              }
869           }
870           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
871           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
872           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
875           ierr = ISDestroy(&list1);CHKERRQ(ierr);
876           ierr = PetscFree(indx1);CHKERRQ(ierr);
877           break;
878      case 3:
879           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
880           for (i=0;i<dim[0];i++){
881              for (j=0;j<dim[1];j++){
882                 for (k=0;k<dim[2];k++){
883                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
884                 }
885              }
886           }
887           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
888           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
889           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
891           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
892           ierr = ISDestroy(&list1);CHKERRQ(ierr);
893           ierr = PetscFree(indx1);CHKERRQ(ierr);
894           break;
895      default:
896 */
897           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
898           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
899           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
902           ierr = ISDestroy(&list1);CHKERRQ(ierr);
903           //ierr = ISDestroy(list1);CHKERRQ(ierr);
904  //         break;
905   //    }
906     }
907 
908  else{
909 
910  switch (ndim){
911  case 1:
912 #if defined(PETSC_USE_COMPLEX)
913       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
914       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
915       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
916       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
917       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
918       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
919       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
922       ierr = ISDestroy(&list1);CHKERRQ(ierr);
923       ierr = ISDestroy(&list2);CHKERRQ(ierr);
924       break;
925 #else
926   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
927 #endif
928   break;
929  case 2:
930 #if defined(PETSC_USE_COMPLEX)
931       //PetscInt my_value;
932       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
933       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
934       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
935       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
936       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
937       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
938       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
941       ierr = ISDestroy(&list1);CHKERRQ(ierr);
942       ierr = ISDestroy(&list2);CHKERRQ(ierr);
943       break;
944 #else
945       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
946       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
947 
948       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
949       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
950       printf("Val local_0_start = %ld\n",local_0_start);
951 
952       if (dim[1]%2==0)
953         NM = dim[1]+2;
954       else
955         NM = dim[1]+1;
956 
957       for (i=0;i<local_n0;i++){
958          for (j=0;j<dim[1];j++){
959             tempindx = i*dim[1] + j;
960             tempindx1 = i*NM + j;
961             indx1[tempindx]=local_0_start*dim[1]+tempindx;
962             indx2[tempindx]=low+tempindx1;
963            // printf("Val tempindx1 = %d\n",tempindx1);
964   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
965   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
966   //          printf("-------------------------\n",indx2[tempindx],rank);
967         }
968      }
969 
970       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
971       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
972 
973       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
974       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
977       ierr = ISDestroy(&list1);CHKERRQ(ierr);
978       ierr = ISDestroy(&list2);CHKERRQ(ierr);
979       ierr = PetscFree(indx1);CHKERRQ(ierr);
980       ierr = PetscFree(indx2);CHKERRQ(ierr);
981       break;
982 #endif
983 
984  case 3:
985       #if defined(PETSC_USE_COMPLEX)
986       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
987       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
988       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
989       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
990       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
991       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
992       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
995       ierr = ISDestroy(&list1);CHKERRQ(ierr);
996       ierr = ISDestroy(&list2);CHKERRQ(ierr);
997       break;
998 #else
999       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1000       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1001 
1002       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1003       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1004       printf("Val local_0_start = %ld\n",local_0_start);
1005 
1006       if (dim[2]%2==0)
1007         NM = dim[2]+2;
1008       else
1009         NM = dim[2]+1;
1010 
1011       for (i=0;i<local_n0;i++){
1012          for (j=0;j<dim[1];j++){
1013             for (k=0;k<dim[2];k++){
1014                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1015                tempindx1 = i*dim[1]*NM + j*NM + k;
1016                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1017                indx2[tempindx]=low+tempindx1;
1018             }
1019            // printf("Val tempindx1 = %d\n",tempindx1);
1020            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1021            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1022            // printf("-------------------------\n",indx2[tempindx],rank);
1023         }
1024      }
1025 
1026       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1027       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1028 
1029       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1030       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1031       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1032       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1033       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1034       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1035       ierr = PetscFree(indx1);CHKERRQ(ierr);
1036       ierr = PetscFree(indx2);CHKERRQ(ierr);
1037       break;
1038 #endif
1039 
1040  default:
1041 #if defined(PETSC_USE_COMPLEX)
1042       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1043       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1044       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1045       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1046       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1047       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1048       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1051       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1052       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1053 
1054 
1055 #else
1056       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1057       printf("The value of temp is %ld\n",temp);
1058       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1059       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1060       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1061       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1062 
1063       partial_dim = fftw->partial_dim;
1064       printf("The value of partial dim is %d\n",partial_dim);
1065 
1066       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1067       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1068       printf("Val local_0_start = %ld\n",local_0_start);
1069 
1070       if (dim[ndim-1]%2==0)
1071         NM = 2;
1072       else
1073         NM = 1;
1074 
1075       j = low;
1076       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1077          {
1078           indx1[i] = local_0_start*partial_dim + i;
1079           indx2[i] = j;
1080           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1081           if (k%dim[ndim-1]==0)
1082             { j+=NM;}
1083           j++;
1084          }
1085       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1086       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1087       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1088       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1091       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1092       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1093       ierr = PetscFree(indx1);CHKERRQ(ierr);
1094       ierr = PetscFree(indx2);CHKERRQ(ierr);
1095       break;
1096 #endif
1097   }
1098  }
1099 
1100  return 0;
1101 }
1102 EXTERN_C_END
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "OutputTransformFFT"
1106 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
1107 {
1108   PetscErrorCode ierr;
1109   PetscFunctionBegin;
1110   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 /*
1115       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
1116   Input A, x, y
1117   A - FFTW matrix
1118   x - FFTW vector
1119   y - PETSc vector that user can read
1120   Options Database Keys:
1121 + -mat_fftw_plannerflags - set FFTW planner flags
1122 
1123    Level: intermediate
1124 
1125 */
1126 
1127 EXTERN_C_BEGIN
1128 #undef __FUNCT__
1129 #define __FUNCT__ "OutputTransformFFT_FTTW"
1130 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
1131 {
1132   PetscErrorCode ierr;
1133   MPI_Comm       comm=((PetscObject)A)->comm;
1134   Mat_FFT        *fft = (Mat_FFT*)A->data;
1135   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
1136   PetscInt       N=fft->N, N1, n1 ,NM;
1137   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
1138   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
1139   PetscInt       i,j,k,rank,size,partial_dim;
1140   ptrdiff_t      alloc_local,local_n0,local_0_start;
1141   ptrdiff_t      local_n1,local_1_start,temp;
1142   VecScatter     vecscat;
1143   IS             list1,list2;
1144 
1145   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1146   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1147   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
1148 
1149   if (size==1){
1150 /*
1151     switch (ndim){
1152     case 1:
1153            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1154           for (i=0;i<dim[0];i++)
1155              {
1156               indx1[i] = i;
1157              }
1158           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1159           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1160           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1161           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1162           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1163           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1164           ierr = PetscFree(indx1);CHKERRQ(ierr);
1165           break;
1166 
1167     case 2:
1168          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1169           for (i=0;i<dim[0];i++){
1170              for (j=0;j<dim[1];j++){
1171                 indx1[i*dim[1]+j] = i*dim[1] + j;
1172              }
1173           }
1174          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1175          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1176          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1177          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1178          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1179          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1180          ierr = PetscFree(indx1);CHKERRQ(ierr);
1181          break;
1182     case 3:
1183          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1184          for (i=0;i<dim[0];i++){
1185             for (j=0;j<dim[1];j++){
1186                for (k=0;k<dim[2];k++){
1187                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1188                }
1189             }
1190          }
1191          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1192          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1193          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1194          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1195          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1196          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1197          ierr = PetscFree(indx1);CHKERRQ(ierr);
1198          break;
1199     default:
1200 */
1201          ierr = ISCreateStride(comm,N,0,1,&list1);
1202          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1203          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1204          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1205          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1207          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1208   //       break;
1209    // }
1210   }
1211   else{
1212 
1213  switch (ndim){
1214  case 1:
1215 #if defined(PETSC_USE_COMPLEX)
1216       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1217       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
1218       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
1219       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1220       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1221       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1222       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1224       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1225       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1226       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1227       break;
1228 
1229 #else
1230       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1231 #endif
1232   break;
1233  case 2:
1234 #if defined(PETSC_USE_COMPLEX)
1235       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1236       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1237       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1238       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1239       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1240       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1241       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1242       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1243       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1244       break;
1245 #else
1246       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1247       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
1248 
1249      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1250      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1251      printf("Val local_0_start = %ld\n",local_0_start);
1252 
1253      if (dim[1]%2==0)
1254       NM = dim[1]+2;
1255     else
1256       NM = dim[1]+1;
1257 
1258 
1259 
1260      for (i=0;i<local_n0;i++){
1261         for (j=0;j<dim[1];j++){
1262             tempindx = i*dim[1] + j;
1263             tempindx1 = i*NM + j;
1264             indx1[tempindx]=local_0_start*dim[1]+tempindx;
1265             indx2[tempindx]=low+tempindx1;
1266        //     printf("Val tempindx1 = %d\n",tempindx1);
1267        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1268        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1269        //     printf("-------------------------\n",indx2[tempindx],rank);
1270         }
1271      }
1272 
1273      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1274      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1275 
1276      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1277      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1278      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1279      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1280      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1281      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1282      ierr = PetscFree(indx1);CHKERRQ(ierr);
1283      ierr = PetscFree(indx2);CHKERRQ(ierr);
1284   break;
1285 #endif
1286  case 3:
1287 #if defined(PETSC_USE_COMPLEX)
1288       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1289       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1290       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1291       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1292       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1293       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1294       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1295       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1297       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1298       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1299       break;
1300 #else
1301 
1302       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1303       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1304 
1305      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1306      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1307      printf("Val local_0_start = %ld\n",local_0_start);
1308 
1309      if (dim[2]%2==0)
1310       NM = dim[2]+2;
1311     else
1312       NM = dim[2]+1;
1313 
1314      for (i=0;i<local_n0;i++){
1315         for (j=0;j<dim[1];j++){
1316            for (k=0;k<dim[2];k++){
1317               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1318               tempindx1 = i*dim[1]*NM + j*NM + k;
1319               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1320               indx2[tempindx]=low+tempindx1;
1321            }
1322         //    printf("Val tempindx1 = %d\n",tempindx1);
1323         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1324         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1325         //    printf("-------------------------\n",indx2[tempindx],rank);
1326         }
1327      }
1328 
1329      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1330      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1331 
1332      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1333      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1336      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1337      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1338      ierr = PetscFree(indx1);CHKERRQ(ierr);
1339      ierr = PetscFree(indx2);CHKERRQ(ierr);
1340   break;
1341 #endif
1342   default:
1343 #if defined(PETSC_USE_COMPLEX)
1344       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1345       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1346       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1347       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1348       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1349       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1350       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1351       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1353       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1354       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1355 #else
1356      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1357      printf("The value of temp is %ld\n",temp);
1358      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1359      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1360      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1361      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1362 
1363      partial_dim = fftw->partial_dim;
1364      printf("The value of partial dim is %d\n",partial_dim);
1365 
1366      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1367      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1368      printf("Val local_0_start = %ld\n",local_0_start);
1369 
1370      if (dim[ndim-1]%2==0)
1371        NM = 2;
1372      else
1373        NM = 1;
1374 
1375      j = low;
1376      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1377         {
1378          indx1[i] = local_0_start*partial_dim + i;
1379          indx2[i] = j;
1380          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1381          if (k%dim[ndim-1]==0)
1382            { j+=NM;}
1383          j++;
1384         }
1385      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1386      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1387 
1388       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1389 
1390 
1391      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1392      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1393      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1394      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1395      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1396      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1397      ierr = PetscFree(indx1);CHKERRQ(ierr);
1398      ierr = PetscFree(indx2);CHKERRQ(ierr);
1399 
1400      break;
1401 #endif
1402  }
1403  }
1404  return 0;
1405 }
1406 EXTERN_C_END
1407 
1408 EXTERN_C_BEGIN
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatCreate_FFTW"
1411 /*
1412       MatCreate_FFTW - Creates a matrix object that provides FFT
1413   via the external package FFTW
1414   Options Database Keys:
1415 + -mat_fftw_plannerflags - set FFTW planner flags
1416 
1417    Level: intermediate
1418 
1419 */
1420 
1421 PetscErrorCode MatCreate_FFTW(Mat A)
1422 {
1423   PetscErrorCode ierr;
1424   MPI_Comm       comm=((PetscObject)A)->comm;
1425   Mat_FFT        *fft=(Mat_FFT*)A->data;
1426   Mat_FFTW       *fftw;
1427   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1428   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1429   PetscBool      flg;
1430   PetscInt       p_flag,partial_dim=1,ctr,N1;
1431   PetscMPIInt    size,rank;
1432   ptrdiff_t      *pdim, temp;
1433   ptrdiff_t      local_n1,local_1_start,local_1_end;
1434 
1435   PetscFunctionBegin;
1436   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1437   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1438   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1439 
1440   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1441   pdim[0] = dim[0];
1442   for(ctr=1;ctr<ndim;ctr++)
1443       {
1444           partial_dim *= dim[ctr];
1445           pdim[ctr] = dim[ctr];
1446       }
1447 
1448   if (size == 1) {
1449 #if defined(PETSC_USE_COMPLEX)
1450     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1451     n = N;
1452 #else
1453     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1454     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1455     n = tot_dim;
1456 #endif
1457 
1458   } else {
1459     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1460     switch (ndim){
1461     case 1:
1462 #if !defined(PETSC_USE_COMPLEX)
1463   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1464 #else
1465       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
1466       n = (PetscInt)local_n0;
1467       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1468 #endif
1469 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1470       break;
1471     case 2:
1472 #if defined(PETSC_USE_COMPLEX)
1473       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1474       /*
1475        PetscMPIInt    rank;
1476        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1477        PetscSynchronizedFlush(comm);
1478        */
1479       n = (PetscInt)local_n0*dim[1];
1480       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1481 #else
1482       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1483       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1484       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1485 #endif
1486       break;
1487     case 3:
1488 //      printf("The value of alloc local is %d",alloc_local);
1489 #if defined(PETSC_USE_COMPLEX)
1490       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1491       n = (PetscInt)local_n0*dim[1]*dim[2];
1492       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1493 #else
1494       printf("The code comes here\n");
1495       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1496       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1497       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1498 #endif
1499       break;
1500     default:
1501 #if defined(PETSC_USE_COMPLEX)
1502       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1503 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
1504 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
1505       n = (PetscInt)local_n0*partial_dim;
1506 //      printf("New partial dimension is %d %d %d",n,N,ndim);
1507       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1508 #else
1509       temp = pdim[ndim-1];
1510       pdim[ndim-1]= temp/2 + 1;
1511       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1512       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1513       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1514       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1515       pdim[ndim-1] = temp;
1516       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1517       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1518 #endif
1519       break;
1520     }
1521   }
1522   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1523   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1524   fft->data = (void*)fftw;
1525 
1526   fft->n           = n;
1527   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1528   fftw->partial_dim = partial_dim;
1529   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1530   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1531 
1532   fftw->p_forward  = 0;
1533   fftw->p_backward = 0;
1534   fftw->p_flag     = FFTW_ESTIMATE;
1535 
1536   if (size == 1){
1537     A->ops->mult          = MatMult_SeqFFTW;
1538     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1539   } else {
1540     A->ops->mult          = MatMult_MPIFFTW;
1541     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1542   }
1543   fft->matdestroy          = MatDestroy_FFTW;
1544 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1545   A->ops->getvecs       = MatGetVecs_FFTW;
1546   A->assembled          = PETSC_TRUE;
1547   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFT_C","MatGetVecsFFT_FFTW",MatGetVecsFFT_FFTW);
1548   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1549   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1550 
1551   /* get runtime options */
1552   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1553     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1554     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1555   PetscOptionsEnd();
1556   PetscFunctionReturn(0);
1557 }
1558 EXTERN_C_END
1559 
1560 
1561 
1562 
1563