xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 8ca4c034b1bc534a44cb6bf3488e1c5b0979d612)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed!
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "MatMult_SeqFFTW"
31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32 {
33   PetscErrorCode ierr;
34   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36   PetscScalar    *x_array,*y_array;
37   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38 
39   PetscFunctionBegin;
40   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42   if (!fftw->p_forward){ /* create a plan, then excute it */
43     switch (ndim){
44     case 1:
45 #if defined(PETSC_USE_COMPLEX)
46       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
47 #else
48       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
49 #endif
50       break;
51     case 2:
52 #if defined(PETSC_USE_COMPLEX)
53       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54 #else
55       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
56 #endif
57       break;
58     case 3:
59 #if defined(PETSC_USE_COMPLEX)
60       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
61 #else
62       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
63 #endif
64       break;
65     default:
66 #if defined(PETSC_USE_COMPLEX)
67       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68 #else
69       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
70 #endif
71       break;
72     }
73     fftw->finarray  = x_array;
74     fftw->foutarray = y_array;
75     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77     fftw_execute(fftw->p_forward);
78   } else { /* use existing plan */
79     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81     } else {
82       fftw_execute(fftw->p_forward);
83     }
84   }
85   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93 {
94   PetscErrorCode ierr;
95   Mat_FFT        *fft = (Mat_FFT*)A->data;
96   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97   PetscScalar    *x_array,*y_array;
98   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99 
100   PetscFunctionBegin;
101   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103   if (!fftw->p_backward){ /* create a plan, then excute it */
104     switch (ndim){
105     case 1:
106 #if defined(PETSC_USE_COMPLEX)
107       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
108 #else
109       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
110 #endif
111       break;
112     case 2:
113 #if defined(PETSC_USE_COMPLEX)
114       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
115 #else
116       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
117 #endif
118       break;
119     case 3:
120 #if defined(PETSC_USE_COMPLEX)
121       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
122 #else
123       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
124 #endif
125       break;
126     default:
127 #if defined(PETSC_USE_COMPLEX)
128       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
129 #else
130       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
131 #endif
132       break;
133     }
134     fftw->binarray  = x_array;
135     fftw->boutarray = y_array;
136     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137   } else { /* use existing plan */
138     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140     } else {
141       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142     }
143   }
144   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "MatMult_MPIFFTW"
151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152 {
153   PetscErrorCode ierr;
154   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156   PetscScalar    *x_array,*y_array;
157   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158   MPI_Comm       comm=((PetscObject)A)->comm;
159 
160   PetscFunctionBegin;
161   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163   if (!fftw->p_forward){ /* create a plan, then excute it */
164     switch (ndim){
165     case 1:
166 #if defined(PETSC_USE_COMPLEX)
167       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168 #else
169       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
170 #endif
171       break;
172     case 2:
173 #if defined(PETSC_USE_COMPLEX)
174       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
175 #else
176       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
177 #endif
178       break;
179     case 3:
180 #if defined(PETSC_USE_COMPLEX)
181       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
182 #else
183       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
184 #endif
185       break;
186     default:
187 #if defined(PETSC_USE_COMPLEX)
188       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
189 #else
190       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
191 #endif
192       break;
193     }
194     fftw->finarray  = x_array;
195     fftw->foutarray = y_array;
196     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198     fftw_execute(fftw->p_forward);
199   } else { /* use existing plan */
200     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202     } else {
203       fftw_execute(fftw->p_forward);
204     }
205   }
206   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214 {
215   PetscErrorCode ierr;
216   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218   PetscScalar    *x_array,*y_array;
219   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220   MPI_Comm       comm=((PetscObject)A)->comm;
221 
222   PetscFunctionBegin;
223   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225   if (!fftw->p_backward){ /* create a plan, then excute it */
226     switch (ndim){
227     case 1:
228 #if defined(PETSC_USE_COMPLEX)
229       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230 #else
231       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
232 #endif
233       break;
234     case 2:
235 #if defined(PETSC_USE_COMPLEX)
236       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
237 #else
238       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
239 #endif
240       break;
241     case 3:
242 #if defined(PETSC_USE_COMPLEX)
243       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
244 #else
245       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
246 #endif
247       break;
248     default:
249 #if defined(PETSC_USE_COMPLEX)
250       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
251 #else
252       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
253 #endif
254       break;
255     }
256     fftw->binarray  = x_array;
257     fftw->boutarray = y_array;
258     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259   } else { /* use existing plan */
260     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262     } else {
263       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264     }
265   }
266   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_FFTW"
273 PetscErrorCode MatDestroy_FFTW(Mat A)
274 {
275   Mat_FFT        *fft = (Mat_FFT*)A->data;
276   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   fftw_destroy_plan(fftw->p_forward);
281   fftw_destroy_plan(fftw->p_backward);
282   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288 #undef __FUNCT__
289 #define __FUNCT__ "VecDestroy_MPIFFTW"
290 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291 {
292   PetscErrorCode  ierr;
293   PetscScalar     *array;
294 
295   PetscFunctionBegin;
296   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatGetVecsFFTW"
306 /*@
307    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
308      parallel layout determined by FFTW
309 
310    Collective on Mat
311 
312    Input Parameter:
313 .  mat - the matrix
314 
315    Output Parameter:
316 +   fin - (optional) input vector of forward FFTW
317 -   fout - (optional) output vector of forward FFTW
318 
319   Level: advanced
320 
321 .seealso: MatCreateFFTW()
322 @*/
323 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
324 {
325   PetscErrorCode ierr;
326   PetscFunctionBegin;
327   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 EXTERN_C_BEGIN
332 #undef __FUNCT__
333 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
334 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
335 {
336   PetscErrorCode ierr;
337   PetscMPIInt    size,rank;
338   MPI_Comm       comm=((PetscObject)A)->comm;
339   Mat_FFT        *fft = (Mat_FFT*)A->data;
340   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
341   PetscInt       N=fft->N;
342   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
343 
344   PetscFunctionBegin;
345   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
346   PetscValidType(A,1);
347 
348   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
349   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
350   if (size == 1){ /* sequential case */
351 #if defined(PETSC_USE_COMPLEX)
352     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
353     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
354     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
355 #else
356 //    if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
357 //    if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
358 //    if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
359     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
360     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
361     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
362 #endif
363   } else {
364     ptrdiff_t      alloc_local,local_n0,local_0_start;
365     ptrdiff_t      local_n1;
366     fftw_complex   *data_fout;
367     ptrdiff_t      local_1_start;
368 #if defined(PETSC_USE_COMPLEX)
369     fftw_complex   *data_fin,*data_bout;
370 #else
371     double         *data_finr,*data_boutr;
372     PetscInt       n1,N1,vsize;
373     ptrdiff_t      temp;
374 #endif
375 
376     switch (ndim){
377           case 1:
378 #if !defined(PETSC_USE_COMPLEX)
379                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
380 #else
381                  //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
382                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
383                  if (fin) {
384                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
385                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
386                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
387                          }
388                  if (fout) {
389                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
390                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
391                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
392                          }
393                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
394                  if (bout) {
395                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
396                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
397                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
398                          }
399           break;
400 #endif
401           case 2:
402 #if !defined(PETSC_USE_COMPLEX)
403                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
404                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
405                  if (fin) {
406                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
407                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
408                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
409                           }
410                  if (bout) {
411                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
412                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
413                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
414                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
415                           }
416                  //n1 = 2*local_n1*dim[0];
417                  if (fout) {
418                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
419                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
420                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
421                            }
422 #else
423       /* Get local size */
424                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
425                  if (fin) {
426                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
427                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
428                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
429                           }
430                  if (fout) {
431                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
432                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
433                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
434                            }
435                  if (bout) {
436                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
437                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
438                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
439                           }
440 #endif
441           break;
442           case 3:
443 #if !defined(PETSC_USE_COMPLEX)
444                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
445                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
446                  if (fin) {
447                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
448                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
449                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
450                          }
451                  if (bout) {
452                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
453                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
454                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
455                           }
456                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
457                  if (fout) {
458                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
459                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
460                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
461                           }
462 #else
463                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
464                  if (fin) {
465                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
466                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
467                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
468                          }
469                  if (fout) {
470                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
471                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
472                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
473                          }
474                  if (bout) {
475                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
476                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
477                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
478                          }
479 #endif
480           break;
481           default:
482 #if !defined(PETSC_USE_COMPLEX)
483                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
484                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
485                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
486                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
487                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
488 
489                  if (fin) {
490                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
491                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
492                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
493                         }
494                  if (bout) {
495                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
496                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
497                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
498                         }
499                  //temp = fftw->partial_dim;
500                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
501                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
502                  if (fout) {
503                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
504                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
505                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
506                         }
507 #else
508                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
509                 if (fin) {
510                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
511                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
512                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
513                        }
514                 if (fout) {
515                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
516                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
517                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
518                        }
519                 if (bout) {
520                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
521                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
522                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
523                        }
524 #endif
525             break;
526           }
527   }
528   PetscFunctionReturn(0);
529 }
530 EXTERN_C_END
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatGetVecs_FFTW"
534 /*
535    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
536      parallel layout determined by FFTW
537 
538    Collective on Mat
539 
540    Input Parameter:
541 .  mat - the matrix
542 
543    Output Parameter:
544 +   fin - (optional) input vector of forward FFTW
545 -   fout - (optional) output vector of forward FFTW
546 
547   Level: advanced
548 
549 .seealso: MatCreateFFTW()
550 */
551 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
552 {
553   PetscErrorCode ierr;
554   PetscMPIInt    size,rank;
555   MPI_Comm       comm=((PetscObject)A)->comm;
556   Mat_FFT        *fft = (Mat_FFT*)A->data;
557   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
558   PetscInt       N=fft->N;
559   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
560 
561   PetscFunctionBegin;
562   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
563   PetscValidType(A,1);
564 
565   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
566   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
567   if (size == 1){ /* sequential case */
568 #if defined(PETSC_USE_COMPLEX)
569     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
570     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
571 #else
572     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
573     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
574 #endif
575   } else {        /* mpi case */
576     ptrdiff_t      alloc_local,local_n0,local_0_start;
577     ptrdiff_t      local_n1,local_1_end;
578     fftw_complex   *data_fin,*data_fout;
579 #if !defined(PETSC_USE_COMPLEX)
580     double         *data_finr ;
581     ptrdiff_t      local_1_start,temp;
582     PetscInt       vsize,n1,N1;
583 #endif
584 
585 //    PetscInt ctr;
586 //    ptrdiff_t      ndim1,*pdim;
587 //    ndim1=(ptrdiff_t) ndim;
588 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
589 
590 //    for(ctr=0;ctr<ndim;ctr++)
591 //        {k
592 //           pdim[ctr] = dim[ctr];
593 //       }
594 
595 
596 
597     switch (ndim){
598     case 1:
599       /* Get local size */
600       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
601 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
602       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
603       printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1);
604       if (fin) {
605         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
606         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
607         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
608       }
609       if (fout) {
610         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
611         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
612         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
613       }
614       break;
615     case 2:
616 #if !defined(PETSC_USE_COMPLEX)
617       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
618       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
619       if (fin) {
620         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
621         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
622         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
623         //printf("The code comes here with vector size %d\n",vsize);
624         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
625       }
626       n1 = 2*local_n1*(dim[0]);
627       //n1 = 2*local_n1*dim[1];
628       printf("The values are %ld\n",local_n1);
629       if (fout) {
630         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
631         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
632         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
633       }
634       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
635 
636 #else
637       /* Get local size */
638      //printf("Hope this does not come here");
639       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
640       if (fin) {
641         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
642         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
643         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
644       }
645       if (fout) {
646         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
647         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
648         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
649       }
650      //printf("Hope this does not come here");
651 #endif
652       break;
653     case 3:
654       /* Get local size */
655 #if !defined(PETSC_USE_COMPLEX)
656       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
657       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
658       if (fin) {
659         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
660         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
661         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
662         //printf("The code comes here with vector size %d\n",vsize);
663         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
664       }
665       printf("The value is %ld",local_n1);
666       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
667       if (fout) {
668         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
669         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
670         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
671       }
672       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
673 
674 
675 #else
676       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
677 //      printf("The quantity n is %d",n);
678       if (fin) {
679         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
680         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
681         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
682       }
683       if (fout) {
684         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
685         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
686         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
687       }
688 #endif
689       break;
690     default:
691       /* Get local size */
692 #if !defined(PETSC_USE_COMPLEX)
693       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
694       printf("The value of temp is %ld\n",temp);
695       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
696       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
697       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
698       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
699       if (fin) {
700         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
701         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
702         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
703         //printf("The code comes here with vector size %d\n",vsize);
704         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
705       }
706       printf("The value is %ld. Global length is %d \n",local_n1,N1);
707       temp = fftw->partial_dim;
708       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
709 
710       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
711       if (fout) {
712         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
713         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
714         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
715       }
716 
717 #else
718       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
719 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
720 //      printf("The value of alloc local is %d",alloc_local);
721 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
722 //      for(i=0;i<ndim;i++)
723 //         {
724 //          pdim[i]=dim[i];printf("%d",pdim[i]);
725 //         }
726 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
727 //      printf("The quantity n is %d",n);
728       if (fin) {
729         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
730         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
731         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
732       }
733       if (fout) {
734         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
735         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
736         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
737       }
738 #endif
739       break;
740     }
741   }
742 //  if (fin){
743 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
744 //  }
745 //  if (fout){
746 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
747 //  }
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "InputTransformFFT"
753 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
754 {
755   PetscErrorCode ierr;
756   PetscFunctionBegin;
757   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
758   PetscFunctionReturn(0);
759 }
760 
761 /*
762       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real
763       parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst
764       changing dimension.
765   Input A, x, y
766   A - FFTW matrix
767   x - user data
768   Options Database Keys:
769 + -mat_fftw_plannerflags - set FFTW planner flags
770 
771    Level: intermediate
772 
773 */
774 
775 EXTERN_C_BEGIN
776 #undef __FUNCT__
777 #define __FUNCT__ "InputTransformFFT_FTTW"
778 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
779 {
780   PetscErrorCode ierr;
781   MPI_Comm       comm=((PetscObject)A)->comm;
782   Mat_FFT        *fft = (Mat_FFT*)A->data;
783   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
784   PetscInt       N=fft->N;
785   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
786   PetscInt       low;
787   PetscInt       rank,size,vsize,vsize1;
788   ptrdiff_t      alloc_local,local_n0,local_0_start;
789   ptrdiff_t      local_n1,local_1_start;
790   VecScatter     vecscat;
791   IS             list1,list2;
792 #if !defined(PETSC_USE_COMPLEX)
793   PetscInt       i,j,k,partial_dim;
794   PetscInt       *indx1, *indx2, tempindx, tempindx1;
795   PetscInt       N1, n1 ,NM;
796   ptrdiff_t      temp;
797 #endif
798 
799   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
800   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
801   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
802 
803   if (size==1)
804     {
805 /*     switch (ndim){
806      case 1:
807           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
808           for (i=0;i<dim[0];i++)
809              {
810               indx1[i] = i;
811              }
812           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
813           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
814           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
815           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
816           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
817           ierr = ISDestroy(&list1);CHKERRQ(ierr);
818           ierr = PetscFree(indx1);CHKERRQ(ierr);
819      break;
820 
821      case 2:
822           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
823           for (i=0;i<dim[0];i++){
824              for (j=0;j<dim[1];j++){
825                 indx1[i*dim[1]+j] = i*dim[1] + j;
826              }
827           }
828           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
829           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
830           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
831           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
833           ierr = ISDestroy(&list1);CHKERRQ(ierr);
834           ierr = PetscFree(indx1);CHKERRQ(ierr);
835           break;
836      case 3:
837           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
838           for (i=0;i<dim[0];i++){
839              for (j=0;j<dim[1];j++){
840                 for (k=0;k<dim[2];k++){
841                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
842                 }
843              }
844           }
845           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
846           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
847           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
848           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
849           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
850           ierr = ISDestroy(&list1);CHKERRQ(ierr);
851           ierr = PetscFree(indx1);CHKERRQ(ierr);
852           break;
853      default:
854 */
855           ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
856           ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
857           printf("The values of sizes are %d and %d",vsize,vsize1);
858           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
859           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
860           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
863           ierr = ISDestroy(&list1);CHKERRQ(ierr);
864  //         break;
865   //    }
866     }
867 
868  else{
869 
870  switch (ndim){
871  case 1:
872 #if defined(PETSC_USE_COMPLEX)
873       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
874       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
875       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
876       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
877       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
880       ierr = ISDestroy(&list1);CHKERRQ(ierr);
881       ierr = ISDestroy(&list2);CHKERRQ(ierr);
882 #else
883       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
884 #endif
885  break;
886  case 2:
887 #if defined(PETSC_USE_COMPLEX)
888       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
889       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
890       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
891       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
892       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
895       ierr = ISDestroy(&list1);CHKERRQ(ierr);
896       ierr = ISDestroy(&list2);CHKERRQ(ierr);
897 #else
898       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
899       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
900       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
901       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
902       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
903       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
904 
905       if (dim[1]%2==0)
906         NM = dim[1]+2;
907       else
908         NM = dim[1]+1;
909 
910       for (i=0;i<local_n0;i++){
911          for (j=0;j<dim[1];j++){
912             tempindx = i*dim[1] + j;
913             tempindx1 = i*NM + j;
914             indx1[tempindx]=local_0_start*dim[1]+tempindx;
915             indx2[tempindx]=low+tempindx1;
916         }
917      }
918 
919       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
920       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
921 
922       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
923       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
926       ierr = ISDestroy(&list1);CHKERRQ(ierr);
927       ierr = ISDestroy(&list2);CHKERRQ(ierr);
928       ierr = PetscFree(indx1);CHKERRQ(ierr);
929       ierr = PetscFree(indx2);CHKERRQ(ierr);
930 #endif
931  break;
932 
933  case 3:
934 #if defined(PETSC_USE_COMPLEX)
935       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
936       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
937       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
938       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
939       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
940       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
941       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
944       ierr = ISDestroy(&list1);CHKERRQ(ierr);
945       ierr = ISDestroy(&list2);CHKERRQ(ierr);
946 #else
947       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
948       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
949 
950 //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
951 //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
952       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
953       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
954 
955       if (dim[2]%2==0)
956         NM = dim[2]+2;
957       else
958         NM = dim[2]+1;
959 
960       for (i=0;i<local_n0;i++){
961          for (j=0;j<dim[1];j++){
962             for (k=0;k<dim[2];k++){
963                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
964                tempindx1 = i*dim[1]*NM + j*NM + k;
965                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
966                indx2[tempindx]=low+tempindx1;
967             }
968          }
969       }
970 
971       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
972       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
973       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
974       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
977       ierr = ISDestroy(&list1);CHKERRQ(ierr);
978       ierr = ISDestroy(&list2);CHKERRQ(ierr);
979       ierr = PetscFree(indx1);CHKERRQ(ierr);
980       ierr = PetscFree(indx2);CHKERRQ(ierr);
981 #endif
982  break;
983 
984  default:
985 #if defined(PETSC_USE_COMPLEX)
986       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
987       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
988       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
989       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
990       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
993       ierr = ISDestroy(&list1);CHKERRQ(ierr);
994       ierr = ISDestroy(&list2);CHKERRQ(ierr);
995 #else
996       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
997       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
998       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
999       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1000       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1001 
1002       partial_dim = fftw->partial_dim;
1003 
1004       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1005       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1006 
1007       if (dim[ndim-1]%2==0)
1008         NM = 2;
1009       else
1010         NM = 1;
1011 
1012       j = low;
1013       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1014          {
1015           indx1[i] = local_0_start*partial_dim + i;
1016           indx2[i] = j;
1017           if (k%dim[ndim-1]==0)
1018             { j+=NM;}
1019           j++;
1020          }
1021       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1022       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1023       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1024       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1027       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1028       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1029       ierr = PetscFree(indx1);CHKERRQ(ierr);
1030       ierr = PetscFree(indx2);CHKERRQ(ierr);
1031 #endif
1032  break;
1033   }
1034  }
1035 
1036  return 0;
1037 }
1038 EXTERN_C_END
1039 
1040 #undef __FUNCT__
1041 #define __FUNCT__ "OutputTransformFFT"
1042 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
1043 {
1044   PetscErrorCode ierr;
1045   PetscFunctionBegin;
1046   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 /*
1051       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
1052   Input A, x, y
1053   A - FFTW matrix
1054   x - FFTW vector
1055   y - PETSc vector that user can read
1056   Options Database Keys:
1057 + -mat_fftw_plannerflags - set FFTW planner flags
1058 
1059    Level: intermediate
1060 
1061 */
1062 
1063 EXTERN_C_BEGIN
1064 #undef __FUNCT__
1065 #define __FUNCT__ "OutputTransformFFT_FTTW"
1066 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
1067 {
1068   PetscErrorCode ierr;
1069   MPI_Comm       comm=((PetscObject)A)->comm;
1070   Mat_FFT        *fft = (Mat_FFT*)A->data;
1071   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
1072   PetscInt       N=fft->N;
1073   PetscInt       ndim=fft->ndim,*dim=fft->dim;
1074   PetscInt       low;
1075   PetscInt       rank,size;
1076   ptrdiff_t      alloc_local,local_n0,local_0_start;
1077   ptrdiff_t      local_n1,local_1_start;
1078   VecScatter     vecscat;
1079   IS             list1,list2;
1080 #if !defined(PETSC_USE_COMPLEX)
1081   PetscInt       i,j,k,partial_dim;
1082   PetscInt       *indx1, *indx2, tempindx, tempindx1;
1083   PetscInt       N1, n1 ,NM;
1084   ptrdiff_t      temp;
1085 #endif
1086 
1087 
1088   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1089   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1090   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
1091 
1092   if (size==1){
1093 /*
1094     switch (ndim){
1095     case 1:
1096            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1097           for (i=0;i<dim[0];i++)
1098              {
1099               indx1[i] = i;
1100              }
1101           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1102           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1103           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1104           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1105           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1106           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1107           ierr = PetscFree(indx1);CHKERRQ(ierr);
1108           break;
1109 
1110     case 2:
1111          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1112           for (i=0;i<dim[0];i++){
1113              for (j=0;j<dim[1];j++){
1114                 indx1[i*dim[1]+j] = i*dim[1] + j;
1115              }
1116           }
1117          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1118          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1119          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1121          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1122          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1123          ierr = PetscFree(indx1);CHKERRQ(ierr);
1124          break;
1125     case 3:
1126          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1127          for (i=0;i<dim[0];i++){
1128             for (j=0;j<dim[1];j++){
1129                for (k=0;k<dim[2];k++){
1130                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1131                }
1132             }
1133          }
1134          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1135          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1136          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1137          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1138          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1139          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1140          ierr = PetscFree(indx1);CHKERRQ(ierr);
1141          break;
1142     default:
1143 */
1144     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
1145     //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1146     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1147     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1148     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1150     ierr = ISDestroy(&list1);CHKERRQ(ierr);
1151   //       break;
1152    // }
1153   }
1154   else{
1155 
1156  switch (ndim){
1157  case 1:
1158 #if defined(PETSC_USE_COMPLEX)
1159       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1160       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
1161       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
1162       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1163       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1164       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1165       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1167       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1168       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1169       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1170 #else
1171       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1172 #endif
1173  break;
1174  case 2:
1175 #if defined(PETSC_USE_COMPLEX)
1176       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1177       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1178       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1179       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1180       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1181       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1183       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1184       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1185 #else
1186       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1187       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
1188 
1189       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
1190       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
1191 
1192       if (dim[1]%2==0)
1193         NM = dim[1]+2;
1194       else
1195         NM = dim[1]+1;
1196 
1197       for (i=0;i<local_n0;i++){
1198          for (j=0;j<dim[1];j++){
1199             tempindx = i*dim[1] + j;
1200             tempindx1 = i*NM + j;
1201             indx1[tempindx]=local_0_start*dim[1]+tempindx;
1202             indx2[tempindx]=low+tempindx1;
1203          }
1204        }
1205 
1206        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1207        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1208 
1209        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1210        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1211        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1212        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1213        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1214        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1215        ierr = PetscFree(indx1);CHKERRQ(ierr);
1216        ierr = PetscFree(indx2);CHKERRQ(ierr);
1217 #endif
1218  break;
1219  case 3:
1220 #if defined(PETSC_USE_COMPLEX)
1221        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1222        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1223        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1224        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1225        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1226        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1227        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1228        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1229        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1230 #else
1231 
1232        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1233        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1234 
1235 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1236 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1237        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1238        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1239 
1240        if (dim[2]%2==0)
1241         NM = dim[2]+2;
1242        else
1243         NM = dim[2]+1;
1244 
1245        for (i=0;i<local_n0;i++){
1246           for (j=0;j<dim[1];j++){
1247              for (k=0;k<dim[2];k++){
1248                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1249                 tempindx1 = i*dim[1]*NM + j*NM + k;
1250                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1251                 indx2[tempindx]=low+tempindx1;
1252              }
1253         //    printf("Val tempindx1 = %d\n",tempindx1);
1254         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1255         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1256         //    printf("-------------------------\n",indx2[tempindx],rank);
1257           }
1258        }
1259 
1260        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1261        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1262 
1263        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1264        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1267        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1268        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1269        ierr = PetscFree(indx1);CHKERRQ(ierr);
1270        ierr = PetscFree(indx2);CHKERRQ(ierr);
1271 #endif
1272  break;
1273  default:
1274 #if defined(PETSC_USE_COMPLEX)
1275        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1276        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1277        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1278       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1279       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1280        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1281        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1282        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1284        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1285        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1286 #else
1287        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1288        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1289        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1290        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1291        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1292 
1293        partial_dim = fftw->partial_dim;
1294 
1295        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1296        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1297 
1298        if (dim[ndim-1]%2==0)
1299          NM = 2;
1300        else
1301          NM = 1;
1302 
1303        j = low;
1304        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1305           {
1306            indx1[i] = local_0_start*partial_dim + i;
1307            indx2[i] = j;
1308            if (k%dim[ndim-1]==0)
1309              { j+=NM;}
1310            j++;
1311           }
1312        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1313        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1314 
1315        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1316        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1317        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1318        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1319        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1320        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1321        ierr = PetscFree(indx1);CHKERRQ(ierr);
1322        ierr = PetscFree(indx2);CHKERRQ(ierr);
1323 #endif
1324  break;
1325  }
1326  }
1327  return 0;
1328 }
1329 EXTERN_C_END
1330 
1331 EXTERN_C_BEGIN
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatCreate_FFTW"
1334 /*
1335       MatCreate_FFTW - Creates a matrix object that provides FFT
1336   via the external package FFTW
1337   Options Database Keys:
1338 + -mat_fftw_plannerflags - set FFTW planner flags
1339 
1340    Level: intermediate
1341 
1342 */
1343 
1344 PetscErrorCode MatCreate_FFTW(Mat A)
1345 {
1346   PetscErrorCode ierr;
1347   MPI_Comm       comm=((PetscObject)A)->comm;
1348   Mat_FFT        *fft=(Mat_FFT*)A->data;
1349   Mat_FFTW       *fftw;
1350   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1351   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1352   PetscBool      flg;
1353   PetscInt       p_flag,partial_dim=1,ctr;
1354   PetscMPIInt    size,rank;
1355   ptrdiff_t      *pdim;
1356   ptrdiff_t      local_n1,local_1_start;
1357 #if !defined(PETSC_USE_COMPLEX)
1358    ptrdiff_t     temp;
1359    PetscInt      N1,tot_dim;
1360 #else
1361    PetscInt n1;
1362 #endif
1363 
1364 
1365   PetscFunctionBegin;
1366   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1367   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1368   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1369 
1370   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1371   pdim[0] = dim[0];
1372 #if !defined(PETSC_USE_COMPLEX)
1373   tot_dim = 2*dim[0];
1374 #endif
1375   for (ctr=1;ctr<ndim;ctr++)
1376      {
1377           partial_dim *= dim[ctr];
1378           pdim[ctr] = dim[ctr];
1379 #if !defined(PETSC_USE_COMPLEX)
1380           if (ctr==ndim-1)
1381             tot_dim *= (dim[ctr]/2+1);
1382           else
1383             tot_dim *= dim[ctr];
1384 #endif
1385      }
1386 
1387   if (size == 1) {
1388 #if defined(PETSC_USE_COMPLEX)
1389     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1390     n = N;
1391 #else
1392     //printf("The value of N is %d\n",N);
1393     //tot_dim = 2*N*(dim[ndim-1]/2+1);
1394     //tot_dim = ((long int)tot_dim)/dim[ndim-1];
1395     //printf("The value of is %ld and %d\n",tot_dim,dim[ndim-1]);
1396     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1397     n = tot_dim;
1398 #endif
1399 
1400   } else {
1401     ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end;
1402     switch (ndim){
1403     case 1:
1404 #if !defined(PETSC_USE_COMPLEX)
1405    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1406 #else
1407       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1408       n = (PetscInt)local_n0;
1409       n1 = (PetscInt) local_n1;
1410       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1411 #endif
1412       break;
1413     case 2:
1414 #if defined(PETSC_USE_COMPLEX)
1415       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1416       /*
1417        PetscMPIInt    rank;
1418        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1419        PetscSynchronizedFlush(comm);
1420        */
1421       n = (PetscInt)local_n0*dim[1];
1422       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1423 #else
1424       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1425       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1426 //      n1 = 2*(PetscInt)local_n1*(dim[0]);
1427 //      ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1428       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1429 #endif
1430       break;
1431     case 3:
1432 #if defined(PETSC_USE_COMPLEX)
1433       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1434       n = (PetscInt)local_n0*dim[1]*dim[2];
1435       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1436 #else
1437       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1438       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1439 //      n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
1440 //      ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1441       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1442 #endif
1443       break;
1444     default:
1445 #if defined(PETSC_USE_COMPLEX)
1446       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1447       n = (PetscInt)local_n0*partial_dim;
1448       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1449 #else
1450       temp = pdim[ndim-1];
1451       pdim[ndim-1] = temp/2 + 1;
1452       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1453       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1454       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1455       pdim[ndim-1] = temp;
1456 //      temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]);
1457 //      n1 = 2*local_n1*temp;
1458 //      ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr);
1459       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1460 #endif
1461       break;
1462     }
1463   }
1464   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1465   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1466   fft->data = (void*)fftw;
1467 
1468   fft->n           = n;
1469   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1470   fftw->partial_dim = partial_dim;
1471   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1472   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1473 
1474   fftw->p_forward  = 0;
1475   fftw->p_backward = 0;
1476   fftw->p_flag     = FFTW_ESTIMATE;
1477 
1478   if (size == 1){
1479     A->ops->mult          = MatMult_SeqFFTW;
1480     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1481   } else {
1482     A->ops->mult          = MatMult_MPIFFTW;
1483     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1484   }
1485   fft->matdestroy          = MatDestroy_FFTW;
1486   // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1487   //A->ops->getvecs       = MatGetVecs_FFTW;
1488   A->assembled          = PETSC_TRUE;
1489   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
1490   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1491   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1492 
1493   /* get runtime options */
1494   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1495     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1496     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1497   PetscOptionsEnd();
1498   PetscFunctionReturn(0);
1499 }
1500 EXTERN_C_END
1501 
1502 
1503 
1504 
1505