xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision f76f76bef110e2b36bd545dce2f3b7cb7b1640a3)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   fftw_plan   p_forward,p_backward;
15   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
16   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
17                                                             executed for the arrays with which the plan was created */
18 } Mat_FFTW;
19 
20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatDestroy_FFTW(Mat);
25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatMult_SeqFFTW"
30 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
31 {
32   PetscErrorCode ierr;
33   Mat_FFT        *fft  = (Mat_FFT*)A->data;
34   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
35   PetscScalar    *x_array,*y_array;
36   PetscInt       ndim=fft->ndim,*dim=fft->dim;
37 
38   PetscFunctionBegin;
39 #if !defined(PETSC_USE_COMPLEX)
40 
41   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
42 #endif
43   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
44   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
45   if (!fftw->p_forward){ /* create a plan, then excute it */
46     switch (ndim){
47     case 1:
48       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
49       break;
50     case 2:
51       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
52       break;
53     case 3:
54       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
55       break;
56     default:
57       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
58       break;
59     }
60     fftw->finarray  = x_array;
61     fftw->foutarray = y_array;
62     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
63                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
64     fftw_execute(fftw->p_forward);
65   } else { /* use existing plan */
66     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
67       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
68     } else {
69       fftw_execute(fftw->p_forward);
70     }
71   }
72   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
73   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
79 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
80 {
81   PetscErrorCode ierr;
82   Mat_FFT        *fft = (Mat_FFT*)A->data;
83   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
84   PetscScalar    *x_array,*y_array;
85   PetscInt       ndim=fft->ndim,*dim=fft->dim;
86 
87   PetscFunctionBegin;
88 #if !defined(PETSC_USE_COMPLEX)
89   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
90 #endif
91   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
92   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
93   if (!fftw->p_backward){ /* create a plan, then excute it */
94     switch (ndim){
95     case 1:
96       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
97       break;
98     case 2:
99       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
100       break;
101     case 3:
102       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
103       break;
104     default:
105       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
106       break;
107     }
108     fftw->binarray  = x_array;
109     fftw->boutarray = y_array;
110     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
111   } else { /* use existing plan */
112     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
113       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
114     } else {
115       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
116     }
117   }
118   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
119   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatMult_MPIFFTW"
125 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
126 {
127   PetscErrorCode ierr;
128   Mat_FFT        *fft  = (Mat_FFT*)A->data;
129   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
130   PetscScalar    *x_array,*y_array;
131   PetscInt       ndim=fft->ndim,*dim=fft->dim;
132   MPI_Comm       comm=((PetscObject)A)->comm;
133 // PetscInt ctr;
134 //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
135 //  ndim1=(ptrdiff_t) ndim;
136 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
137 
138 //  for(ctr=0;ctr<ndim;ctr++)
139 //     {
140 //      pdim[ctr] = dim[ctr];
141 //     }
142 
143   PetscFunctionBegin;
144 #if !defined(PETSC_USE_COMPLEX)
145   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
146 #endif
147 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
148 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
149 
150   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
151   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
152   if (!fftw->p_forward){ /* create a plan, then excute it */
153     switch (ndim){
154     case 1:
155 #if defined(PETSC_USE_COMPLEX)
156       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
157 #endif
158       break;
159     case 2:
160 #if defined(PETSC_USE_COMPLEX)
161       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
162 #else
163       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
164 #endif
165       break;
166     case 3:
167 #if defined(PETSC_USE_COMPLEX)
168       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
169 #else
170       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[3],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
171 #endif
172       break;
173     default:
174 #if defined(PETSC_USE_COMPLEX)
175       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
176 #else
177       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
178 #endif
179  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
180       break;
181     }
182     fftw->finarray  = x_array;
183     fftw->foutarray = y_array;
184     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
185                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
186     fftw_execute(fftw->p_forward);
187   } else { /* use existing plan */
188     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
189       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
190     } else {
191       fftw_execute(fftw->p_forward);
192     }
193   }
194   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
195   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
196   PetscFunctionReturn(0);
197 }
198 
199 #undef __FUNCT__
200 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
201 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
202 {
203   PetscErrorCode ierr;
204   Mat_FFT        *fft  = (Mat_FFT*)A->data;
205   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
206   PetscScalar    *x_array,*y_array;
207   PetscInt       ndim=fft->ndim,*dim=fft->dim;
208   MPI_Comm       comm=((PetscObject)A)->comm;
209 //  PetscInt       ctr;
210 //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
211 //  ndim1=(ptrdiff_t) ndim;
212 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
213 
214 //  for(ctr=0;ctr<ndim;ctr++)
215 //     {
216 //      pdim[ctr] = dim[ctr];
217 //     }
218 
219   PetscFunctionBegin;
220 //#if !defined(PETSC_USE_COMPLEX)
221 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
222 //#endif
223 //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
224 // should pdim be a member of Mat_FFTW?
225 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
226 
227   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
228   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
229   if (!fftw->p_backward){ /* create a plan, then excute it */
230     switch (ndim){
231     case 1:
232 #if defined(PETSC_USE_COMPLEX)
233       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
234 #endif
235       break;
236     case 2:
237 #if defined(PETSC_USE_COMPLEX)
238       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
239 #else
240       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
241 #endif
242       break;
243     case 3:
244 #if defined(PETSC_USE_COMPLEX)
245       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
246 #else
247       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
248 #endif
249       break;
250     default:
251 #if defined(PETSC_USE_COMPLEX)
252       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
253 #else
254       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
255 #endif
256 //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
257       break;
258     }
259     fftw->binarray  = x_array;
260     fftw->boutarray = y_array;
261     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
262   } else { /* use existing plan */
263     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
264       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
265     } else {
266       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
267     }
268   }
269   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
270   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatDestroy_FFTW"
276 PetscErrorCode MatDestroy_FFTW(Mat A)
277 {
278   Mat_FFT        *fft = (Mat_FFT*)A->data;
279   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
280   PetscErrorCode ierr;
281 
282   PetscFunctionBegin;
283 #if !defined(PETSC_USE_COMPLEX)
284   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
285 #endif
286   fftw_destroy_plan(fftw->p_forward);
287   fftw_destroy_plan(fftw->p_backward);
288   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
289   ierr = PetscFree(fft->data);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
294 #undef __FUNCT__
295 #define __FUNCT__ "VecDestroy_MPIFFTW"
296 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
297 {
298   PetscErrorCode  ierr;
299   PetscScalar     *array;
300 
301   PetscFunctionBegin;
302 #if !defined(PETSC_USE_COMPLEX)
303   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
304 #endif
305   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
306   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
307   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
308   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatGetVecs1DC_FFTW"
314 /*
315    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
316      parallel layout determined by FFTW-1D
317 
318 */
319 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
320 {
321   PetscErrorCode ierr;
322   PetscMPIInt    size,rank;
323   MPI_Comm       comm=((PetscObject)A)->comm;
324   Mat_FFT        *fft = (Mat_FFT*)A->data;
325 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
326   PetscInt       N=fft->N;
327   PetscInt       ndim=fft->ndim,*dim=fft->dim;
328   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
329   ptrdiff_t      f_local_n1,f_local_1_end;
330   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
331   ptrdiff_t      b_local_n1,b_local_1_end;
332   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
333 
334   PetscFunctionBegin;
335 #if !defined(PETSC_USE_COMPLEX)
336   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
337 #endif
338   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
339   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
340   if (size == 1){
341     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
342   }
343   else {
344       if (ndim>1){
345         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
346       else {
347           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
348           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
349           if (fin) {
350             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
351             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
352             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
353           }
354           if (fout) {
355             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
356             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
357             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
358           }
359           if (bin) {
360             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
361             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
362             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
363           }
364           if (bout) {
365             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
366             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
367             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
368           }
369   }
370   if (fin){
371     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
372   }
373   if (fout){
374     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
375   }
376   if (bin){
377     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
378   }
379   if (bout){
380     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
381   }
382   PetscFunctionReturn(0);
383 }
384 
385 
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatGetVecs_FFTW"
390 /*
391    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
392      parallel layout determined by FFTW
393 
394    Collective on Mat
395 
396    Input Parameter:
397 .  mat - the matrix
398 
399    Output Parameter:
400 +   fin - (optional) input vector of forward FFTW
401 -   fout - (optional) output vector of forward FFTW
402 
403   Level: advanced
404 
405 .seealso: MatCreateFFTW()
406 */
407 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
408 {
409   PetscErrorCode ierr;
410   PetscMPIInt    size,rank;
411   MPI_Comm       comm=((PetscObject)A)->comm;
412   Mat_FFT        *fft = (Mat_FFT*)A->data;
413   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
414   PetscInt       N=fft->N, N1, n1,vsize;
415 
416   PetscFunctionBegin;
417 //#if !defined(PETSC_USE_COMPLEX)
418 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
419 //#endif
420   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
421   PetscValidType(A,1);
422 
423   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
424   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
425   if (size == 1){ /* sequential case */
426     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
427     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
428     printf("The code successfully comes at the end of the routine with one processor\n");
429   } else {        /* mpi case */
430     ptrdiff_t      alloc_local,local_n0,local_0_start;
431     ptrdiff_t      local_n1,local_1_end;
432     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
433     fftw_complex   *data_fin,*data_fout;
434     double         *data_finr, *data_foutr;
435     ptrdiff_t local_1_start;
436 //    PetscInt ctr;
437 //    ptrdiff_t      ndim1,*pdim;
438 //    ndim1=(ptrdiff_t) ndim;
439 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
440 
441 //    for(ctr=0;ctr<ndim;ctr++)
442 //        {k
443 //           pdim[ctr] = dim[ctr];
444 //       }
445 
446 
447 
448     switch (ndim){
449     case 1:
450       /* Get local size */
451       /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */
452 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
453       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
454       if (fin) {
455         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
456         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
457         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
458       }
459       if (fout) {
460         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
461         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
462         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
463       }
464       break;
465     case 2:
466 #if !defined(PETSC_USE_COMPLEX)
467       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
468       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
469       if (fin) {
470         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
471         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
472         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
473         //printf("The code comes here with vector size %d\n",vsize);
474         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
475       }
476       if (fout) {
477         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
478         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
479         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
480       }
481       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
482 
483 #else
484       /* Get local size */
485      printf("Hope this does not come here");
486       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
487       if (fin) {
488         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
489         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
490         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
491       }
492       if (fout) {
493         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
494         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
495         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
496       }
497      printf("Hope this does not come here");
498 #endif
499       break;
500     case 3:
501       /* Get local size */
502 #if !defined(PETSC_USE_COMPLEX)
503   SETERRQ(comm,PETSC_ERR_SUP,"Not done yet");
504 #else
505       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
506 //      printf("The quantity n is %d",n);
507       if (fin) {
508         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
509         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
510         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
511       }
512       if (fout) {
513         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
514         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
515         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
516       }
517 #endif
518       break;
519     default:
520       /* Get local size */
521 #if !defined(PETSC_USE_COMPLEX)
522   SETERRQ(comm,PETSC_ERR_SUP,"Not done yet");
523 #else
524       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
525 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
526 //      printf("The value of alloc local is %d",alloc_local);
527 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
528 //      for(i=0;i<ndim;i++)
529 //         {
530 //          pdim[i]=dim[i];printf("%d",pdim[i]);
531 //         }
532 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
533 //      printf("The quantity n is %d",n);
534       if (fin) {
535         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
536         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
537         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
538       }
539       if (fout) {
540         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
541         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
542         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
543       }
544 #endif
545       break;
546     }
547   }
548 //  if (fin){
549 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
550 //  }
551 //  if (fout){
552 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
553 //  }
554   PetscFunctionReturn(0);
555 }
556 
557 //EXTERN_C_BEGIN - Do we need this?
558 #undef __FUNCT__
559 #define __FUNCT__ "InputTransformFFT"
560 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
561 {
562   PetscErrorCode ierr;
563   PetscFunctionBegin;
564   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
565   PetscFunctionReturn(0);
566 }
567 //EXTERN_C_END - Do we need this?
568 /*
569       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
570   Input A, x, y
571   A - FFTW matrix
572   x - user data
573   Options Database Keys:
574 + -mat_fftw_plannerflags - set FFTW planner flags
575 
576    Level: intermediate
577 
578 */
579 
580 EXTERN_C_BEGIN
581 #undef __FUNCT__
582 #define __FUNCT__ "InputTransformFFT_FTTW"
583 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
584 {
585   PetscErrorCode ierr;
586   MPI_Comm       comm=((PetscObject)A)->comm;
587   Mat_FFT        *fft = (Mat_FFT*)A->data;
588   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
589   PetscInt       N=fft->N, N1, n1 ,NM;
590   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
591   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
592   PetscInt       i,j,rank,size;
593   ptrdiff_t      alloc_local,local_n0,local_0_start;
594   ptrdiff_t      local_n1,local_1_start;
595   VecScatter     vecscat;
596   IS             list1,list2;
597 
598   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
599   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
600   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
601   printf("Local ownership starts at %d\n",low);
602 
603  switch (ndim){
604  case 1:
605   SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW");
606   break;
607  case 2:
608       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
609       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
610 
611      ierr = PetscMalloc(sizeof(PetscInt)*((int)local_n0)*N1,&indx1);CHKERRQ(ierr);
612      ierr = PetscMalloc(sizeof(PetscInt)*((int)local_n0)*N1,&indx2);CHKERRQ(ierr);
613      printf("Val local_0_start = %d",local_0_start);
614 
615      if (dim[1]%2==0)
616       NM = dim[1]+2;
617     else
618       NM = dim[1]+1;
619 
620      for (i=0;i<local_n0;i++){
621         for (j=0;j<dim[1];j++){
622             tempindx = i*dim[1] + j;
623             tempindx1 = i*NM + j;
624             indx1[tempindx]=local_0_start*N1+tempindx;
625             indx2[tempindx]=low+tempindx1;
626             printf("Val tempindx1 = %d",tempindx1);
627             printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
628             printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
629         }
630      }
631 
632      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
633      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
634 
635      ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
636      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
637      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
638      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
639      break;
640 
641  case 3:
642   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
643   break;
644 
645  default:
646   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
647   break;
648  }
649 
650  return 0;
651 }
652 EXTERN_C_END
653 
654 /*
655 //EXTERN_C_BEGIN - Do we need this?
656 #undef __FUNCT__
657 #define __FUNCT__ "OutputTransformFFT"
658 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
659 {
660   PetscErrorCode ierr;
661   PetscFunctionBegin;
662   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 //EXTERN_C_END - Do we need this?
666 */
667 
668 EXTERN_C_BEGIN
669 #undef __FUNCT__
670 #define __FUNCT__ "MatCreate_FFTW"
671 /*
672       MatCreate_FFTW - Creates a matrix object that provides FFT
673   via the external package FFTW
674   Options Database Keys:
675 + -mat_fftw_plannerflags - set FFTW planner flags
676 
677    Level: intermediate
678 
679 */
680 
681 PetscErrorCode MatCreate_FFTW(Mat A)
682 {
683   PetscErrorCode ierr;
684   MPI_Comm       comm=((PetscObject)A)->comm;
685   Mat_FFT        *fft=(Mat_FFT*)A->data;
686   Mat_FFTW       *fftw;
687   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
688   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
689   PetscBool      flg;
690   PetscInt       p_flag,partial_dim=1,ctr;
691   PetscMPIInt    size,rank;
692   ptrdiff_t      *pdim;
693 
694   PetscFunctionBegin;
695 //#if !defined(PETSC_USE_COMPLEX)
696 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
697 //#endif
698 
699   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
700   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
701       printf("The code is coming here\n");
702   ierr = MPI_Barrier(PETSC_COMM_WORLD);
703 
704   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
705   pdim[0] = dim[0];
706   for(ctr=1;ctr<ndim;ctr++)
707       {
708           partial_dim *= dim[ctr];
709           pdim[ctr] = dim[ctr];
710       }
711 //#if !defined(PETSC_USE_COMPLEX)
712 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
713 //#endif
714 
715 //  printf("partial dimension is %d",partial_dim);
716   if (size == 1) {
717     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
718     n = N;
719   } else {
720     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
721     switch (ndim){
722     case 1:
723 #if !defined(PETSC_USE_COMPLEX)
724       printf("The code is coming here\n");
725   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
726 #endif
727       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
728       n = (PetscInt)local_n0;
729       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
730 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
731       break;
732     case 2:
733       printf("The code is coming here\n");
734       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
735       /*
736        PetscMPIInt    rank;
737        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
738        PetscSynchronizedFlush(comm);
739        */
740       n = (PetscInt)local_n0*dim[1];
741       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
742       break;
743     case 3:
744 //      printf("The value of alloc local is %d",alloc_local);
745       n = (PetscInt)local_n0*dim[1]*dim[2];
746       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
747       break;
748     default:
749       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
750 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
751 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
752       n = (PetscInt)local_n0*partial_dim;
753 //      printf("New partial dimension is %d %d %d",n,N,ndim);
754       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
755       break;
756     }
757   }
758   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
759 
760   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
761   fft->data = (void*)fftw;
762 
763   fft->n           = n;
764   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
765   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
766   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
767 
768   fftw->p_forward  = 0;
769   fftw->p_backward = 0;
770   fftw->p_flag     = FFTW_ESTIMATE;
771 
772   if (size == 1){
773     A->ops->mult          = MatMult_SeqFFTW;
774     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
775   } else {
776     A->ops->mult          = MatMult_MPIFFTW;
777     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
778   }
779   fft->matdestroy          = MatDestroy_FFTW;
780   A->ops->getvecs       = MatGetVecs_FFTW;
781   A->assembled          = PETSC_TRUE;
782   printf("The code is coming here\n");
783 #if !defined(PETSC_USE_COMPLEX)
784   printf("The code is coming here\n");
785   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
786 //  PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
787 #endif
788 
789   /* get runtime options */
790   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
791     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
792     if (flg) {fftw->p_flag = (unsigned)p_flag;}
793   PetscOptionsEnd();
794   PetscFunctionReturn(0);
795 }
796 EXTERN_C_END
797 
798 
799 
800 
801