xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 1abc60207cdaa74fcbe9e62dcf6e5da233a93d51)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed!
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "MatMult_SeqFFTW"
31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32 {
33   PetscErrorCode ierr;
34   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36   PetscScalar    *x_array,*y_array;
37   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38 
39   PetscFunctionBegin;
40   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42   if (!fftw->p_forward){ /* create a plan, then excute it */
43     switch (ndim){
44     case 1:
45 #if defined(PETSC_USE_COMPLEX)
46       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
47 #else
48       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
49 #endif
50       break;
51     case 2:
52 #if defined(PETSC_USE_COMPLEX)
53       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54 #else
55       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
56 #endif
57       break;
58     case 3:
59 #if defined(PETSC_USE_COMPLEX)
60       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
61 #else
62       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
63 #endif
64       break;
65     default:
66 #if defined(PETSC_USE_COMPLEX)
67       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68 #else
69       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
70 #endif
71       break;
72     }
73     fftw->finarray  = x_array;
74     fftw->foutarray = y_array;
75     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77     fftw_execute(fftw->p_forward);
78   } else { /* use existing plan */
79     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81     } else {
82       fftw_execute(fftw->p_forward);
83     }
84   }
85   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93 {
94   PetscErrorCode ierr;
95   Mat_FFT        *fft = (Mat_FFT*)A->data;
96   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97   PetscScalar    *x_array,*y_array;
98   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99 
100   PetscFunctionBegin;
101   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103   if (!fftw->p_backward){ /* create a plan, then excute it */
104     switch (ndim){
105     case 1:
106 #if defined(PETSC_USE_COMPLEX)
107       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
108 #else
109       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
110 #endif
111       break;
112     case 2:
113 #if defined(PETSC_USE_COMPLEX)
114       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
115 #else
116       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
117 #endif
118       break;
119     case 3:
120 #if defined(PETSC_USE_COMPLEX)
121       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
122 #else
123       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
124 #endif
125       break;
126     default:
127 #if defined(PETSC_USE_COMPLEX)
128       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
129 #else
130       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
131 #endif
132       break;
133     }
134     fftw->binarray  = x_array;
135     fftw->boutarray = y_array;
136     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137   } else { /* use existing plan */
138     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140     } else {
141       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142     }
143   }
144   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "MatMult_MPIFFTW"
151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152 {
153   PetscErrorCode ierr;
154   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156   PetscScalar    *x_array,*y_array;
157   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158   MPI_Comm       comm=((PetscObject)A)->comm;
159 
160   PetscFunctionBegin;
161   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163   if (!fftw->p_forward){ /* create a plan, then excute it */
164     switch (ndim){
165     case 1:
166 #if defined(PETSC_USE_COMPLEX)
167       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168 #else
169       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
170 #endif
171       break;
172     case 2:
173 #if defined(PETSC_USE_COMPLEX)
174       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
175 #else
176       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
177 #endif
178       break;
179     case 3:
180 #if defined(PETSC_USE_COMPLEX)
181       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
182 #else
183       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
184 #endif
185       break;
186     default:
187 #if defined(PETSC_USE_COMPLEX)
188       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
189 #else
190       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
191 #endif
192       break;
193     }
194     fftw->finarray  = x_array;
195     fftw->foutarray = y_array;
196     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198     fftw_execute(fftw->p_forward);
199   } else { /* use existing plan */
200     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202     } else {
203       fftw_execute(fftw->p_forward);
204     }
205   }
206   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214 {
215   PetscErrorCode ierr;
216   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218   PetscScalar    *x_array,*y_array;
219   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220   MPI_Comm       comm=((PetscObject)A)->comm;
221 
222   PetscFunctionBegin;
223   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225   if (!fftw->p_backward){ /* create a plan, then excute it */
226     switch (ndim){
227     case 1:
228 #if defined(PETSC_USE_COMPLEX)
229       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230 #else
231       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
232 #endif
233       break;
234     case 2:
235 #if defined(PETSC_USE_COMPLEX)
236       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
237 #else
238       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
239 #endif
240       break;
241     case 3:
242 #if defined(PETSC_USE_COMPLEX)
243       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
244 #else
245       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
246 #endif
247       break;
248     default:
249 #if defined(PETSC_USE_COMPLEX)
250       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
251 #else
252       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
253 #endif
254       break;
255     }
256     fftw->binarray  = x_array;
257     fftw->boutarray = y_array;
258     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259   } else { /* use existing plan */
260     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262     } else {
263       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264     }
265   }
266   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_FFTW"
273 PetscErrorCode MatDestroy_FFTW(Mat A)
274 {
275   Mat_FFT        *fft = (Mat_FFT*)A->data;
276   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   fftw_destroy_plan(fftw->p_forward);
281   fftw_destroy_plan(fftw->p_backward);
282   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288 #undef __FUNCT__
289 #define __FUNCT__ "VecDestroy_MPIFFTW"
290 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291 {
292   PetscErrorCode  ierr;
293   PetscScalar     *array;
294 
295   PetscFunctionBegin;
296   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatGetVecsFFTW"
306 /*@
307    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
308      parallel layout determined by FFTW
309 
310    Collective on Mat
311 
312    Input Parameter:
313 .  mat - the matrix
314 
315    Output Parameter:
316 +   fin - (optional) input vector of forward FFTW
317 -   fout - (optional) output vector of forward FFTW
318 
319   Level: advanced
320 
321 .seealso: MatCreateFFTW()
322 @*/
323 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
324 {
325   PetscErrorCode ierr;
326   PetscFunctionBegin;
327   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 EXTERN_C_BEGIN
332 #undef __FUNCT__
333 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
334 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
335 {
336   PetscErrorCode ierr;
337   PetscMPIInt    size,rank;
338   MPI_Comm       comm=((PetscObject)A)->comm;
339   Mat_FFT        *fft = (Mat_FFT*)A->data;
340   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
341   PetscInt       N=fft->N;
342   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
343 
344   PetscFunctionBegin;
345   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
346   PetscValidType(A,1);
347 
348   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
349   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
350   if (size == 1){ /* sequential case */
351 #if defined(PETSC_USE_COMPLEX)
352     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
353     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
354     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
355 #else
356 //    if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
357 //    if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
358 //    if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
359     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
360     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
361     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
362 #endif
363   } else {
364     ptrdiff_t      alloc_local,local_n0,local_0_start;
365     ptrdiff_t      local_n1;
366     fftw_complex   *data_fout;
367     ptrdiff_t      local_1_start;
368 #if defined(PETSC_USE_COMPLEX)
369     fftw_complex   *data_fin,*data_bout;
370 #else
371     double         *data_finr,*data_boutr;
372     PetscInt       n1,N1,vsize;
373     ptrdiff_t      temp;
374 #endif
375 
376     switch (ndim){
377           case 1:
378 #if !defined(PETSC_USE_COMPLEX)
379                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
380 #else
381                  //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
382                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
383                  if (fin) {
384                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
385                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
386                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
387                          }
388                  if (fout) {
389                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
390                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
391                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
392                          }
393                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
394                  if (bout) {
395                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
396                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
397                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
398                          }
399           break;
400 #endif
401           case 2:
402 #if !defined(PETSC_USE_COMPLEX)
403                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
404                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
405                  if (fin) {
406                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
407                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
408                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
409                           }
410                  if (bout) {
411                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
412                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
413                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
414                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
415                           }
416                  //n1 = 2*local_n1*dim[0];
417                  if (fout) {
418                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
419                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
420                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
421                            }
422 #else
423       /* Get local size */
424                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
425                  if (fin) {
426                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
427                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
428                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
429                           }
430                  if (fout) {
431                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
432                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
433                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
434                            }
435                  if (bout) {
436                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
437                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
438                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
439                           }
440 #endif
441           break;
442           case 3:
443 #if !defined(PETSC_USE_COMPLEX)
444                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
445                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
446                  if (fin) {
447                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
448                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
449                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
450                          }
451                  if (bout) {
452                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
453                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
454                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
455                           }
456                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
457                  if (fout) {
458                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
459                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
460                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
461                           }
462 #else
463                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
464                  if (fin) {
465                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
466                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
467                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
468                          }
469                  if (fout) {
470                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
471                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
472                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
473                          }
474                  if (bout) {
475                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
476                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
477                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
478                          }
479 #endif
480           break;
481           default:
482 #if !defined(PETSC_USE_COMPLEX)
483                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
484                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
485                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
486                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
487                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
488 
489                  if (fin) {
490                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
491                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
492                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
493                         }
494                  if (bout) {
495                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
496                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
497                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
498                         }
499                  //temp = fftw->partial_dim;
500                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
501                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
502                  if (fout) {
503                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
504                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
505                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
506                         }
507 #else
508                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
509                 if (fin) {
510                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
511                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
512                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
513                        }
514                 if (fout) {
515                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
516                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
517                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
518                        }
519                 if (bout) {
520                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
521                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
522                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
523                        }
524 #endif
525             break;
526           }
527   }
528   PetscFunctionReturn(0);
529 }
530 EXTERN_C_END
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatGetVecs_FFTW"
534 /*
535    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
536      parallel layout determined by FFTW
537 
538    Collective on Mat
539 
540    Input Parameter:
541 .  mat - the matrix
542 
543    Output Parameter:
544 +   fin - (optional) input vector of forward FFTW
545 -   fout - (optional) output vector of forward FFTW
546 
547   Level: advanced
548 
549 .seealso: MatCreateFFTW()
550 */
551 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
552 {
553   PetscErrorCode ierr;
554   PetscMPIInt    size,rank;
555   MPI_Comm       comm=((PetscObject)A)->comm;
556   Mat_FFT        *fft = (Mat_FFT*)A->data;
557   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
558   PetscInt       N=fft->N;
559   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
560 
561   PetscFunctionBegin;
562   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
563   PetscValidType(A,1);
564 
565   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
566   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
567   if (size == 1){ /* sequential case */
568 #if defined(PETSC_USE_COMPLEX)
569     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
570     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
571 #else
572     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
573     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
574 #endif
575   } else {        /* mpi case */
576     ptrdiff_t      alloc_local,local_n0,local_0_start;
577     ptrdiff_t      local_n1,local_1_end;
578     fftw_complex   *data_fin,*data_fout;
579 #if !defined(PETSC_USE_COMPLEX)
580     double         *data_finr ;
581     ptrdiff_t      local_1_start,temp;
582     PetscInt       vsize,n1,N1;
583 #endif
584 
585 //    PetscInt ctr;
586 //    ptrdiff_t      ndim1,*pdim;
587 //    ndim1=(ptrdiff_t) ndim;
588 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
589 
590 //    for(ctr=0;ctr<ndim;ctr++)
591 //        {k
592 //           pdim[ctr] = dim[ctr];
593 //       }
594 
595 
596 
597     switch (ndim){
598     case 1:
599       /* Get local size */
600       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
601 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
602       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
603       printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1);
604       if (fin) {
605         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
606         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
607         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
608       }
609       if (fout) {
610         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
611         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
612         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
613       }
614       break;
615     case 2:
616 #if !defined(PETSC_USE_COMPLEX)
617       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
618       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
619       if (fin) {
620         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
621         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
622         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
623         //printf("The code comes here with vector size %d\n",vsize);
624         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
625       }
626       n1 = 2*local_n1*(dim[0]);
627       //n1 = 2*local_n1*dim[1];
628       printf("The values are %ld\n",local_n1);
629       if (fout) {
630         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
631         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
632         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
633       }
634       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
635 
636 #else
637       /* Get local size */
638      //printf("Hope this does not come here");
639       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
640       if (fin) {
641         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
642         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
643         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
644       }
645       if (fout) {
646         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
647         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
648         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
649       }
650      //printf("Hope this does not come here");
651 #endif
652       break;
653     case 3:
654       /* Get local size */
655 #if !defined(PETSC_USE_COMPLEX)
656       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
657       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
658       if (fin) {
659         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
660         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
661         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
662         //printf("The code comes here with vector size %d\n",vsize);
663         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
664       }
665       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
666       if (fout) {
667         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
668         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
669         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
670       }
671 
672 
673 #else
674       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
675 //      printf("The quantity n is %d",n);
676       if (fin) {
677         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
678         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
679         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
680       }
681       if (fout) {
682         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
683         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
684         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
685       }
686 #endif
687       break;
688     default:
689       /* Get local size */
690 #if !defined(PETSC_USE_COMPLEX)
691       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
692       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
693       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
694       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
695       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
696       if (fin) {
697         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
698         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
699         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
700         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
701       }
702       temp = fftw->partial_dim;
703       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
704 
705       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
706       if (fout) {
707         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
708         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
709         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
710       }
711 
712 #else
713       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
714       if (fin) {
715         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
716         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
717         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
718       }
719       if (fout) {
720         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
721         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
722         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
723       }
724 #endif
725       break;
726     }
727   }
728 //  if (fin){
729 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
730 //  }
731 //  if (fout){
732 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
733 //  }
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNCT__
738 #define __FUNCT__ "InputTransformFFT"
739 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
740 {
741   PetscErrorCode ierr;
742   PetscFunctionBegin;
743   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
744   PetscFunctionReturn(0);
745 }
746 
747 /*
748       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real
749       parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst
750       changing dimension.
751   Input A, x, y
752   A - FFTW matrix
753   x - user data
754   Options Database Keys:
755 + -mat_fftw_plannerflags - set FFTW planner flags
756 
757    Level: intermediate
758 
759 */
760 
761 EXTERN_C_BEGIN
762 #undef __FUNCT__
763 #define __FUNCT__ "InputTransformFFT_FTTW"
764 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
765 {
766   PetscErrorCode ierr;
767   MPI_Comm       comm=((PetscObject)A)->comm;
768   Mat_FFT        *fft = (Mat_FFT*)A->data;
769   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
770   PetscInt       N=fft->N;
771   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
772   PetscInt       low;
773   PetscInt       rank,size,vsize,vsize1;
774   ptrdiff_t      alloc_local,local_n0,local_0_start;
775   ptrdiff_t      local_n1,local_1_start;
776   VecScatter     vecscat;
777   IS             list1,list2;
778 #if !defined(PETSC_USE_COMPLEX)
779   PetscInt       i,j,k,partial_dim;
780   PetscInt       *indx1, *indx2, tempindx, tempindx1;
781   PetscInt       N1, n1 ,NM;
782   ptrdiff_t      temp;
783 #endif
784 
785   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
786   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
787   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
788 
789   if (size==1)
790     {
791           ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
792           ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
793           printf("The values of sizes are %d and %d",vsize,vsize1);
794           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
795           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
796           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
797           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
798           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
799           ierr = ISDestroy(&list1);CHKERRQ(ierr);
800     }
801 
802  else{
803 
804  switch (ndim){
805  case 1:
806 #if defined(PETSC_USE_COMPLEX)
807       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
808       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
809       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
810       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
811       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
812       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
813       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
814       ierr = ISDestroy(&list1);CHKERRQ(ierr);
815       ierr = ISDestroy(&list2);CHKERRQ(ierr);
816 #else
817       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
818 #endif
819  break;
820  case 2:
821 #if defined(PETSC_USE_COMPLEX)
822       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
823       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
824       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
825       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
826       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
829       ierr = ISDestroy(&list1);CHKERRQ(ierr);
830       ierr = ISDestroy(&list2);CHKERRQ(ierr);
831 #else
832       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
833       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
834       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
835       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
836       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
837       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
838 
839       if (dim[1]%2==0)
840         NM = dim[1]+2;
841       else
842         NM = dim[1]+1;
843 
844       for (i=0;i<local_n0;i++){
845          for (j=0;j<dim[1];j++){
846             tempindx = i*dim[1] + j;
847             tempindx1 = i*NM + j;
848             indx1[tempindx]=local_0_start*dim[1]+tempindx;
849             indx2[tempindx]=low+tempindx1;
850         }
851      }
852 
853       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
854       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
855 
856       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
857       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
860       ierr = ISDestroy(&list1);CHKERRQ(ierr);
861       ierr = ISDestroy(&list2);CHKERRQ(ierr);
862       ierr = PetscFree(indx1);CHKERRQ(ierr);
863       ierr = PetscFree(indx2);CHKERRQ(ierr);
864 #endif
865  break;
866 
867  case 3:
868 #if defined(PETSC_USE_COMPLEX)
869       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
870       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
871       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
872       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
873       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
875       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
876       ierr = ISDestroy(&list1);CHKERRQ(ierr);
877       ierr = ISDestroy(&list2);CHKERRQ(ierr);
878 #else
879       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
880       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
881 
882       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
883       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
884 
885       if (dim[2]%2==0)
886         NM = dim[2]+2;
887       else
888         NM = dim[2]+1;
889 
890       for (i=0;i<local_n0;i++){
891          for (j=0;j<dim[1];j++){
892             for (k=0;k<dim[2];k++){
893                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
894                tempindx1 = i*dim[1]*NM + j*NM + k;
895                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
896                indx2[tempindx]=low+tempindx1;
897             }
898          }
899       }
900 
901       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
902       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
903       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
904       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
905       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
906       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
907       ierr = ISDestroy(&list1);CHKERRQ(ierr);
908       ierr = ISDestroy(&list2);CHKERRQ(ierr);
909       ierr = PetscFree(indx1);CHKERRQ(ierr);
910       ierr = PetscFree(indx2);CHKERRQ(ierr);
911 #endif
912  break;
913 
914  default:
915 #if defined(PETSC_USE_COMPLEX)
916       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
917       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
918       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
919       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
920       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
923       ierr = ISDestroy(&list1);CHKERRQ(ierr);
924       ierr = ISDestroy(&list2);CHKERRQ(ierr);
925 #else
926       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
927       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
928       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
929       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
930       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
931 
932       partial_dim = fftw->partial_dim;
933 
934       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
935       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
936 
937       if (dim[ndim-1]%2==0)
938         NM = 2;
939       else
940         NM = 1;
941 
942       j = low;
943       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
944          {
945           indx1[i] = local_0_start*partial_dim + i;
946           indx2[i] = j;
947           if (k%dim[ndim-1]==0)
948             { j+=NM;}
949           j++;
950          }
951       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
952       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
953       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
954       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
956       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
957       ierr = ISDestroy(&list1);CHKERRQ(ierr);
958       ierr = ISDestroy(&list2);CHKERRQ(ierr);
959       ierr = PetscFree(indx1);CHKERRQ(ierr);
960       ierr = PetscFree(indx2);CHKERRQ(ierr);
961 #endif
962  break;
963   }
964  }
965  return(0);
966 }
967 EXTERN_C_END
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "OutputTransformFFT"
971 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
972 {
973   PetscErrorCode ierr;
974   PetscFunctionBegin;
975   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 /*
980       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
981   Input A, x, y
982   A - FFTW matrix
983   x - FFTW vector
984   y - PETSc vector that user can read
985   Options Database Keys:
986 + -mat_fftw_plannerflags - set FFTW planner flags
987 
988    Level: intermediate
989 
990 */
991 
992 EXTERN_C_BEGIN
993 #undef __FUNCT__
994 #define __FUNCT__ "OutputTransformFFT_FTTW"
995 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
996 {
997   PetscErrorCode ierr;
998   MPI_Comm       comm=((PetscObject)A)->comm;
999   Mat_FFT        *fft = (Mat_FFT*)A->data;
1000   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
1001   PetscInt       N=fft->N;
1002   PetscInt       ndim=fft->ndim,*dim=fft->dim;
1003   PetscInt       low;
1004   PetscInt       rank,size;
1005   ptrdiff_t      alloc_local,local_n0,local_0_start;
1006   ptrdiff_t      local_n1,local_1_start;
1007   VecScatter     vecscat;
1008   IS             list1,list2;
1009 #if !defined(PETSC_USE_COMPLEX)
1010   PetscInt       i,j,k,partial_dim;
1011   PetscInt       *indx1, *indx2, tempindx, tempindx1;
1012   PetscInt       N1, n1 ,NM;
1013   ptrdiff_t      temp;
1014 #endif
1015 
1016 
1017   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1018   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1019   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
1020 
1021   if (size==1){
1022     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
1023     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1024     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1026     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1027     ierr = ISDestroy(&list1);CHKERRQ(ierr);
1028   }
1029   else{
1030 
1031   switch (ndim){
1032   case 1:
1033 #if defined(PETSC_USE_COMPLEX)
1034       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1035       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
1036       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
1037       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1038       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1039       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1040       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1041       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1042       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1043 #else
1044       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1045 #endif
1046   break;
1047   case 2:
1048 #if defined(PETSC_USE_COMPLEX)
1049       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1050       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1051       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1052       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1053       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1055       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1056       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1057       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1058 #else
1059       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1060       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
1061 
1062       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
1063       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
1064 
1065       if (dim[1]%2==0)
1066         NM = dim[1]+2;
1067       else
1068         NM = dim[1]+1;
1069 
1070       for (i=0;i<local_n0;i++){
1071          for (j=0;j<dim[1];j++){
1072             tempindx = i*dim[1] + j;
1073             tempindx1 = i*NM + j;
1074             indx1[tempindx]=local_0_start*dim[1]+tempindx;
1075             indx2[tempindx]=low+tempindx1;
1076          }
1077        }
1078 
1079        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1080        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1081 
1082        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1083        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1084        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1086        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1087        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1088        ierr = PetscFree(indx1);CHKERRQ(ierr);
1089        ierr = PetscFree(indx2);CHKERRQ(ierr);
1090 #endif
1091   break;
1092   case 3:
1093 #if defined(PETSC_USE_COMPLEX)
1094        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1095        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1096        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1097        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1098        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1099        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1100        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1101        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1102        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1103 #else
1104 
1105        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1106        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1107 
1108 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1109 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1110        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1111        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1112 
1113        if (dim[2]%2==0)
1114         NM = dim[2]+2;
1115        else
1116         NM = dim[2]+1;
1117 
1118        for (i=0;i<local_n0;i++){
1119           for (j=0;j<dim[1];j++){
1120              for (k=0;k<dim[2];k++){
1121                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1122                 tempindx1 = i*dim[1]*NM + j*NM + k;
1123                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1124                 indx2[tempindx]=low+tempindx1;
1125              }
1126           }
1127        }
1128 
1129        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1130        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1131 
1132        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1133        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1134        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1135        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1136        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1137        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1138        ierr = PetscFree(indx1);CHKERRQ(ierr);
1139        ierr = PetscFree(indx2);CHKERRQ(ierr);
1140 #endif
1141   break;
1142   default:
1143 #if defined(PETSC_USE_COMPLEX)
1144        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1145        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1146        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1147        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1148        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1150        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1151        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1152        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1153 #else
1154        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1155        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1156        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1157        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1158        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1159 
1160        partial_dim = fftw->partial_dim;
1161 
1162        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1163        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1164 
1165        if (dim[ndim-1]%2==0)
1166          NM = 2;
1167        else
1168          NM = 1;
1169 
1170        j = low;
1171        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1172           {
1173            indx1[i] = local_0_start*partial_dim + i;
1174            indx2[i] = j;
1175            if (k%dim[ndim-1]==0)
1176              { j+=NM;}
1177            j++;
1178           }
1179        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1180        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1181 
1182        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1183        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1184        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1185        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1186        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1187        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1188        ierr = PetscFree(indx1);CHKERRQ(ierr);
1189        ierr = PetscFree(indx2);CHKERRQ(ierr);
1190 #endif
1191   break;
1192   }
1193   }
1194   return 0;
1195 }
1196 EXTERN_C_END
1197 
1198 EXTERN_C_BEGIN
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatCreate_FFTW"
1201 /*
1202       MatCreate_FFTW - Creates a matrix object that provides FFT
1203   via the external package FFTW
1204   Options Database Keys:
1205 + -mat_fftw_plannerflags - set FFTW planner flags
1206 
1207    Level: intermediate
1208 
1209 */
1210 
1211 PetscErrorCode MatCreate_FFTW(Mat A)
1212 {
1213   PetscErrorCode ierr;
1214   MPI_Comm       comm=((PetscObject)A)->comm;
1215   Mat_FFT        *fft=(Mat_FFT*)A->data;
1216   Mat_FFTW       *fftw;
1217   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1218   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1219   PetscBool      flg;
1220   PetscInt       p_flag,partial_dim=1,ctr;
1221   PetscMPIInt    size,rank;
1222   ptrdiff_t      *pdim;
1223   ptrdiff_t      local_n1,local_1_start;
1224 #if !defined(PETSC_USE_COMPLEX)
1225    ptrdiff_t     temp;
1226    PetscInt      N1,tot_dim;
1227 #else
1228    PetscInt n1;
1229 #endif
1230 
1231 
1232   PetscFunctionBegin;
1233   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1234   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1235   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1236 
1237   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1238   pdim[0] = dim[0];
1239 #if !defined(PETSC_USE_COMPLEX)
1240   tot_dim = 2*dim[0];
1241 #endif
1242   for (ctr=1;ctr<ndim;ctr++)
1243      {
1244           partial_dim *= dim[ctr];
1245           pdim[ctr] = dim[ctr];
1246 #if !defined(PETSC_USE_COMPLEX)
1247           if (ctr==ndim-1)
1248             tot_dim *= (dim[ctr]/2+1);
1249           else
1250             tot_dim *= dim[ctr];
1251 #endif
1252      }
1253 
1254   if (size == 1) {
1255 #if defined(PETSC_USE_COMPLEX)
1256     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1257     n = N;
1258 #else
1259     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1260     n = tot_dim;
1261 #endif
1262 
1263   } else {
1264     ptrdiff_t alloc_local,local_n0,local_0_start;
1265     switch (ndim){
1266     case 1:
1267 #if !defined(PETSC_USE_COMPLEX)
1268    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1269 #else
1270       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1271       n = (PetscInt)local_n0;
1272       n1 = (PetscInt) local_n1;
1273       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1274 #endif
1275       break;
1276     case 2:
1277 #if defined(PETSC_USE_COMPLEX)
1278       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1279       /*
1280        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1281        PetscSynchronizedFlush(comm);
1282        */
1283       n = (PetscInt)local_n0*dim[1];
1284       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1285 #else
1286       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1287       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1288       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1289 #endif
1290       break;
1291     case 3:
1292 #if defined(PETSC_USE_COMPLEX)
1293       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1294       n = (PetscInt)local_n0*dim[1]*dim[2];
1295       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1296 #else
1297       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1298       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1299       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1300 #endif
1301       break;
1302     default:
1303 #if defined(PETSC_USE_COMPLEX)
1304       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1305       n = (PetscInt)local_n0*partial_dim;
1306       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1307 #else
1308       temp = pdim[ndim-1];
1309       pdim[ndim-1] = temp/2 + 1;
1310       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1311       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1312       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1313       pdim[ndim-1] = temp;
1314       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1315 #endif
1316       break;
1317     }
1318   }
1319   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1320   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1321   fft->data = (void*)fftw;
1322 
1323   fft->n           = n;
1324   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1325   fftw->partial_dim = partial_dim;
1326   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1327   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1328 
1329   fftw->p_forward  = 0;
1330   fftw->p_backward = 0;
1331   fftw->p_flag     = FFTW_ESTIMATE;
1332 
1333   if (size == 1){
1334     A->ops->mult          = MatMult_SeqFFTW;
1335     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1336   } else {
1337     A->ops->mult          = MatMult_MPIFFTW;
1338     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1339   }
1340   fft->matdestroy          = MatDestroy_FFTW;
1341   //A->ops->getvecs       = MatGetVecs_FFTW;
1342   A->assembled          = PETSC_TRUE;
1343   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
1344   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1345   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1346 
1347   /* get runtime options */
1348   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1349     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1350     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1351   PetscOptionsEnd();
1352   PetscFunctionReturn(0);
1353 }
1354 EXTERN_C_END
1355 
1356 
1357 
1358 
1359