xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision b90f4e8e2d1ae90880ffa0fd3113e4b748be9ce0)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   fftw_plan   p_forward,p_backward;
15   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
16   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
17                                                             executed for the arrays with which the plan was created */
18 } Mat_FFTW;
19 
20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatDestroy_FFTW(Mat);
25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatMult_SeqFFTW"
30 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
31 {
32   PetscErrorCode ierr;
33   Mat_FFT        *fft  = (Mat_FFT*)A->data;
34   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
35   PetscScalar    *x_array,*y_array;
36   PetscInt       ndim=fft->ndim,*dim=fft->dim;
37 
38   PetscFunctionBegin;
39 #if !defined(PETSC_USE_COMPLEX)
40 
41   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
42 #endif
43   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
44   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
45   if (!fftw->p_forward){ /* create a plan, then excute it */
46     switch (ndim){
47     case 1:
48       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
49       break;
50     case 2:
51       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
52       break;
53     case 3:
54       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
55       break;
56     default:
57       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
58       break;
59     }
60     fftw->finarray  = x_array;
61     fftw->foutarray = y_array;
62     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
63                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
64     fftw_execute(fftw->p_forward);
65   } else { /* use existing plan */
66     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
67       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
68     } else {
69       fftw_execute(fftw->p_forward);
70     }
71   }
72   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
73   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
79 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
80 {
81   PetscErrorCode ierr;
82   Mat_FFT        *fft = (Mat_FFT*)A->data;
83   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
84   PetscScalar    *x_array,*y_array;
85   PetscInt       ndim=fft->ndim,*dim=fft->dim;
86 
87   PetscFunctionBegin;
88 #if !defined(PETSC_USE_COMPLEX)
89   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
90 #endif
91   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
92   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
93   if (!fftw->p_backward){ /* create a plan, then excute it */
94     switch (ndim){
95     case 1:
96       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
97       break;
98     case 2:
99       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
100       break;
101     case 3:
102       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
103       break;
104     default:
105       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
106       break;
107     }
108     fftw->binarray  = x_array;
109     fftw->boutarray = y_array;
110     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
111   } else { /* use existing plan */
112     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
113       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
114     } else {
115       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
116     }
117   }
118   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
119   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatMult_MPIFFTW"
125 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
126 {
127   PetscErrorCode ierr;
128   Mat_FFT        *fft  = (Mat_FFT*)A->data;
129   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
130   PetscScalar    *x_array,*y_array;
131   PetscInt       ndim=fft->ndim,*dim=fft->dim;
132   MPI_Comm       comm=((PetscObject)A)->comm;
133 // PetscInt ctr;
134 //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
135 //  ndim1=(ptrdiff_t) ndim;
136 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
137 
138 //  for(ctr=0;ctr<ndim;ctr++)
139 //     {
140 //      pdim[ctr] = dim[ctr];
141 //     }
142 
143   PetscFunctionBegin;
144 //#if !defined(PETSC_USE_COMPLEX)
145 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
146 //#endif
147 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
148 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
149 
150   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
151   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
152   if (!fftw->p_forward){ /* create a plan, then excute it */
153     switch (ndim){
154     case 1:
155 #if defined(PETSC_USE_COMPLEX)
156       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
157 #endif
158       break;
159     case 2:
160 #if defined(PETSC_USE_COMPLEX)
161       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
162 #else
163       printf("The code comes here \n");
164       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
165 #endif
166       break;
167     case 3:
168 #if defined(PETSC_USE_COMPLEX)
169       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
170 #else
171       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
172 #endif
173       break;
174     default:
175 #if defined(PETSC_USE_COMPLEX)
176       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
177 #else
178       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
179 #endif
180  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
181       break;
182     }
183     fftw->finarray  = x_array;
184     fftw->foutarray = y_array;
185     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
186                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
187     fftw_execute(fftw->p_forward);
188   } else { /* use existing plan */
189     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
190       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
191     } else {
192       fftw_execute(fftw->p_forward);
193     }
194   }
195   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
196   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
197   PetscFunctionReturn(0);
198 }
199 
200 #undef __FUNCT__
201 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
202 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
203 {
204   PetscErrorCode ierr;
205   Mat_FFT        *fft  = (Mat_FFT*)A->data;
206   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
207   PetscScalar    *x_array,*y_array;
208   PetscInt       ndim=fft->ndim,*dim=fft->dim;
209   MPI_Comm       comm=((PetscObject)A)->comm;
210 //  PetscInt       ctr;
211 //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
212 //  ndim1=(ptrdiff_t) ndim;
213 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
214 
215 //  for(ctr=0;ctr<ndim;ctr++)
216 //     {
217 //      pdim[ctr] = dim[ctr];
218 //     }
219 
220   PetscFunctionBegin;
221 //#if !defined(PETSC_USE_COMPLEX)
222 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
223 //#endif
224 //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
225 // should pdim be a member of Mat_FFTW?
226 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
227 
228   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
229   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
230   if (!fftw->p_backward){ /* create a plan, then excute it */
231     switch (ndim){
232     case 1:
233 #if defined(PETSC_USE_COMPLEX)
234       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
235 #endif
236       break;
237     case 2:
238 #if defined(PETSC_USE_COMPLEX)
239       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
240 #else
241       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
242 #endif
243       break;
244     case 3:
245 #if defined(PETSC_USE_COMPLEX)
246       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
247 #else
248       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
249 #endif
250       break;
251     default:
252 #if defined(PETSC_USE_COMPLEX)
253       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
254 #else
255       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
256 #endif
257 //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
258       break;
259     }
260     fftw->binarray  = x_array;
261     fftw->boutarray = y_array;
262     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
263   } else { /* use existing plan */
264     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
265       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
266     } else {
267       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
268     }
269   }
270   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
271   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatDestroy_FFTW"
277 PetscErrorCode MatDestroy_FFTW(Mat A)
278 {
279   Mat_FFT        *fft = (Mat_FFT*)A->data;
280   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
281   PetscErrorCode ierr;
282 
283   PetscFunctionBegin;
284 #if !defined(PETSC_USE_COMPLEX)
285   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
286 #endif
287   fftw_destroy_plan(fftw->p_forward);
288   fftw_destroy_plan(fftw->p_backward);
289   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
290   ierr = PetscFree(fft->data);CHKERRQ(ierr);
291   PetscFunctionReturn(0);
292 }
293 
294 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
295 #undef __FUNCT__
296 #define __FUNCT__ "VecDestroy_MPIFFTW"
297 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
298 {
299   PetscErrorCode  ierr;
300   PetscScalar     *array;
301 
302   PetscFunctionBegin;
303 #if !defined(PETSC_USE_COMPLEX)
304   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
305 #endif
306   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
307   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
308   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
309   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNCT__
314 #define __FUNCT__ "MatGetVecs1DC_FFTW"
315 /*
316    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
317      parallel layout determined by FFTW-1D
318 
319 */
320 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout)
321 {
322   PetscErrorCode ierr;
323   PetscMPIInt    size,rank;
324   MPI_Comm       comm=((PetscObject)A)->comm;
325   Mat_FFT        *fft = (Mat_FFT*)A->data;
326 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
327   PetscInt       N=fft->N;
328   PetscInt       ndim=fft->ndim,*dim=fft->dim;
329   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
330   ptrdiff_t      f_local_n1,f_local_1_end;
331   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
332   ptrdiff_t      b_local_n1,b_local_1_end;
333   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
334 
335   PetscFunctionBegin;
336 #if !defined(PETSC_USE_COMPLEX)
337   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
338 #endif
339   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
340   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
341   if (size == 1){
342     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
343   }
344   else {
345       if (ndim>1){
346         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
347       else {
348           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
349           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
350           if (fin) {
351             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
352             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
353             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
354           }
355           if (fout) {
356             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
357             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
358             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
359           }
360           if (bin) {
361             data_bin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
362             ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr);
363             (*bin)->ops->destroy   = VecDestroy_MPIFFTW;
364           }
365           if (bout) {
366             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
367             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
368             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
369           }
370   }
371   if (fin){
372     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
373   }
374   if (fout){
375     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
376   }
377   if (bin){
378     ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr);
379   }
380   if (bout){
381     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
382   }
383   PetscFunctionReturn(0);
384 }
385 
386 
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatGetVecs_FFTW"
391 /*
392    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
393      parallel layout determined by FFTW
394 
395    Collective on Mat
396 
397    Input Parameter:
398 .  mat - the matrix
399 
400    Output Parameter:
401 +   fin - (optional) input vector of forward FFTW
402 -   fout - (optional) output vector of forward FFTW
403 
404   Level: advanced
405 
406 .seealso: MatCreateFFTW()
407 */
408 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
409 {
410   PetscErrorCode ierr;
411   PetscMPIInt    size,rank;
412   MPI_Comm       comm=((PetscObject)A)->comm;
413   Mat_FFT        *fft = (Mat_FFT*)A->data;
414   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
415   PetscInt       N=fft->N, N1, n1,vsize;
416 
417   PetscFunctionBegin;
418 //#if !defined(PETSC_USE_COMPLEX)
419 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
420 //#endif
421   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
422   PetscValidType(A,1);
423 
424   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
425   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
426   if (size == 1){ /* sequential case */
427     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
428     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
429     printf("The code successfully comes at the end of the routine with one processor\n");
430   } else {        /* mpi case */
431     ptrdiff_t      alloc_local,local_n0,local_0_start;
432     ptrdiff_t      local_n1,local_1_end;
433     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
434     fftw_complex   *data_fin,*data_fout;
435     double         *data_finr ;
436     ptrdiff_t      local_1_start,temp;
437 //    PetscInt ctr;
438 //    ptrdiff_t      ndim1,*pdim;
439 //    ndim1=(ptrdiff_t) ndim;
440 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
441 
442 //    for(ctr=0;ctr<ndim;ctr++)
443 //        {k
444 //           pdim[ctr] = dim[ctr];
445 //       }
446 
447 
448 
449     switch (ndim){
450     case 1:
451       /* Get local size */
452       /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */
453 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
454       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
455       if (fin) {
456         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
457         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
458         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
459       }
460       if (fout) {
461         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
462         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
463         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
464       }
465       break;
466     case 2:
467 #if !defined(PETSC_USE_COMPLEX)
468       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
469       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
470       if (fin) {
471         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
472         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
473         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
474         //printf("The code comes here with vector size %d\n",vsize);
475         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
476       }
477       if (fout) {
478         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
479         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
480         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
481       }
482       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
483 
484 #else
485       /* Get local size */
486      printf("Hope this does not come here");
487       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
488       if (fin) {
489         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
490         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
491         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
492       }
493       if (fout) {
494         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
495         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
496         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
497       }
498      printf("Hope this does not come here");
499 #endif
500       break;
501     case 3:
502       /* Get local size */
503 #if !defined(PETSC_USE_COMPLEX)
504       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
505       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
506       if (fin) {
507         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
508         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
509         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
510         //printf("The code comes here with vector size %d\n",vsize);
511         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
512       }
513       if (fout) {
514         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
515         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
516         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
517       }
518       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
519 
520 
521 #else
522       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
523 //      printf("The quantity n is %d",n);
524       if (fin) {
525         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
526         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
527         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
528       }
529       if (fout) {
530         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
531         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
532         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
533       }
534 #endif
535       break;
536     default:
537       /* Get local size */
538 #if !defined(PETSC_USE_COMPLEX)
539       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
540       printf("The value of temp is %ld\n",temp);
541       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
542       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
543       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
544       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
545       if (fin) {
546         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
547         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
548         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
549         //printf("The code comes here with vector size %d\n",vsize);
550         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
551       }
552       if (fout) {
553         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
554         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
555         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
556       }
557 
558 #else
559       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
560 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
561 //      printf("The value of alloc local is %d",alloc_local);
562 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
563 //      for(i=0;i<ndim;i++)
564 //         {
565 //          pdim[i]=dim[i];printf("%d",pdim[i]);
566 //         }
567 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
568 //      printf("The quantity n is %d",n);
569       if (fin) {
570         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
571         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
572         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
573       }
574       if (fout) {
575         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
576         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
577         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
578       }
579 #endif
580       break;
581     }
582   }
583 //  if (fin){
584 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
585 //  }
586 //  if (fout){
587 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
588 //  }
589   PetscFunctionReturn(0);
590 }
591 
592 #undef __FUNCT__
593 #define __FUNCT__ "InputTransformFFT"
594 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
595 {
596   PetscErrorCode ierr;
597   PetscFunctionBegin;
598   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
599   PetscFunctionReturn(0);
600 }
601 
602 /*
603       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
604   Input A, x, y
605   A - FFTW matrix
606   x - user data
607   Options Database Keys:
608 + -mat_fftw_plannerflags - set FFTW planner flags
609 
610    Level: intermediate
611 
612 */
613 
614 EXTERN_C_BEGIN
615 #undef __FUNCT__
616 #define __FUNCT__ "InputTransformFFT_FTTW"
617 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
618 {
619   PetscErrorCode ierr;
620   MPI_Comm       comm=((PetscObject)A)->comm;
621   Mat_FFT        *fft = (Mat_FFT*)A->data;
622   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
623   PetscInt       N=fft->N, N1, n1 ,NM;
624   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
625   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
626   PetscInt       i,j,k,rank,size;
627   ptrdiff_t      alloc_local,local_n0,local_0_start;
628   ptrdiff_t      local_n1,local_1_start;
629   VecScatter     vecscat;
630   IS             list1,list2;
631 
632   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
633   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
634   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
635   printf("Local ownership starts at %d\n",low);
636 
637  switch (ndim){
638  case 1:
639   SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW");
640   break;
641  case 2:
642       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
643       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
644 
645       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
646       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
647       printf("Val local_0_start = %ld\n",local_0_start);
648 
649       if (dim[1]%2==0)
650         NM = dim[1]+2;
651       else
652         NM = dim[1]+1;
653 
654       for (i=0;i<local_n0;i++){
655          for (j=0;j<dim[1];j++){
656             tempindx = i*dim[1] + j;
657             tempindx1 = i*NM + j;
658             indx1[tempindx]=local_0_start*dim[1]+tempindx;
659             indx2[tempindx]=low+tempindx1;
660            // printf("Val tempindx1 = %d\n",tempindx1);
661   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
662   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
663   //          printf("-------------------------\n",indx2[tempindx],rank);
664         }
665      }
666 
667       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
668       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
669 
670       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
671       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
672       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
673       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
674       break;
675 
676  case 3:
677       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
678       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
679 
680       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
681       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
682       printf("Val local_0_start = %ld\n",local_0_start);
683 
684       if (dim[2]%2==0)
685         NM = dim[2]+2;
686       else
687         NM = dim[2]+1;
688 
689       for (i=0;i<local_n0;i++){
690          for (j=0;j<dim[1];j++){
691             for (k=0;k<dim[2];k++){
692                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
693                tempindx1 = i*dim[1]*NM + j*NM + k;
694                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
695                indx2[tempindx]=low+tempindx1;
696             }
697            // printf("Val tempindx1 = %d\n",tempindx1);
698            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
699            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
700            // printf("-------------------------\n",indx2[tempindx],rank);
701         }
702      }
703 
704       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
705       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
706 
707       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
708       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
709       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
710       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
711       break;
712 
713  default:
714   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
715   break;
716  }
717 
718  return 0;
719 }
720 EXTERN_C_END
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "OutputTransformFFT"
724 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
725 {
726   PetscErrorCode ierr;
727   PetscFunctionBegin;
728   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 /*
733       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
734   Input A, x, y
735   A - FFTW matrix
736   x - FFTW vector
737   y - PETSc vector that user can read
738   Options Database Keys:
739 + -mat_fftw_plannerflags - set FFTW planner flags
740 
741    Level: intermediate
742 
743 */
744 
745 EXTERN_C_BEGIN
746 #undef __FUNCT__
747 #define __FUNCT__ "OutputTransformFFT_FTTW"
748 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
749 {
750   PetscErrorCode ierr;
751   MPI_Comm       comm=((PetscObject)A)->comm;
752   Mat_FFT        *fft = (Mat_FFT*)A->data;
753   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
754   PetscInt       N=fft->N, N1, n1 ,NM;
755   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
756   PetscInt       low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4;
757   PetscInt       i,j,k,rank,size;
758   ptrdiff_t      alloc_local,local_n0,local_0_start;
759   ptrdiff_t      local_n1,local_1_start;
760   VecScatter     vecscat;
761   IS             list1,list2;
762 
763   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
764   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
765   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
766   printf("Local ownership starts at %d\n",low);
767 
768  switch (ndim){
769  case 1:
770   SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW");
771   break;
772  case 2:
773       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
774       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
775 
776      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
777      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
778      printf("Val local_0_start = %ld\n",local_0_start);
779 
780      if (dim[1]%2==0)
781       NM = dim[1]+2;
782     else
783       NM = dim[1]+1;
784 
785 
786 
787      for (i=0;i<local_n0;i++){
788         for (j=0;j<dim[1];j++){
789             tempindx = i*dim[1] + j;
790             tempindx1 = i*NM + j;
791             indx1[tempindx]=local_0_start*dim[1]+tempindx;
792             indx2[tempindx]=low+tempindx1;
793        //     printf("Val tempindx1 = %d\n",tempindx1);
794        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
795        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
796        //     printf("-------------------------\n",indx2[tempindx],rank);
797         }
798      }
799 
800      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
801      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
802 
803      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
804      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
805      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
807   break;
808 
809  case 3:
810       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
811       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
812 
813      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
814      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
815      printf("Val local_0_start = %ld\n",local_0_start);
816 
817      if (dim[2]%2==0)
818       NM = dim[2]+2;
819     else
820       NM = dim[2]+1;
821 
822      for (i=0;i<local_n0;i++){
823         for (j=0;j<dim[1];j++){
824            for (k=0;k<dim[2];k++){
825               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
826               tempindx1 = i*dim[1]*NM + j*NM + k;
827               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
828               indx2[tempindx]=low+tempindx1;
829            }
830         //    printf("Val tempindx1 = %d\n",tempindx1);
831         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
832         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
833         //    printf("-------------------------\n",indx2[tempindx],rank);
834         }
835      }
836 
837      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
838      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
839 
840      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
841      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
843      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
844   break;
845 
846  default:
847   SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet");
848   break;
849  }
850  return 0;
851 }
852 EXTERN_C_END
853 
854 EXTERN_C_BEGIN
855 #undef __FUNCT__
856 #define __FUNCT__ "MatCreate_FFTW"
857 /*
858       MatCreate_FFTW - Creates a matrix object that provides FFT
859   via the external package FFTW
860   Options Database Keys:
861 + -mat_fftw_plannerflags - set FFTW planner flags
862 
863    Level: intermediate
864 
865 */
866 
867 PetscErrorCode MatCreate_FFTW(Mat A)
868 {
869   PetscErrorCode ierr;
870   MPI_Comm       comm=((PetscObject)A)->comm;
871   Mat_FFT        *fft=(Mat_FFT*)A->data;
872   Mat_FFTW       *fftw;
873   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
874   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
875   PetscBool      flg;
876   PetscInt       p_flag,partial_dim=1,ctr,N1;
877   PetscMPIInt    size,rank;
878   ptrdiff_t      *pdim, temp;
879   ptrdiff_t      local_n1,local_1_start;
880 
881   PetscFunctionBegin;
882 //#if !defined(PETSC_USE_COMPLEX)
883 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
884 //#endif
885 
886   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
887   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
888   ierr = MPI_Barrier(PETSC_COMM_WORLD);
889 
890   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
891   pdim[0] = dim[0];
892   for(ctr=1;ctr<ndim;ctr++)
893       {
894           partial_dim *= dim[ctr];
895           pdim[ctr] = dim[ctr];
896       }
897 //#if !defined(PETSC_USE_COMPLEX)
898 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
899 //#endif
900 
901 //  printf("partial dimension is %d",partial_dim);
902   if (size == 1) {
903     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
904     n = N;
905   } else {
906     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
907     switch (ndim){
908     case 1:
909 #if !defined(PETSC_USE_COMPLEX)
910   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
911 #endif
912       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
913       n = (PetscInt)local_n0;
914       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
915 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
916       break;
917     case 2:
918 #if defined(PETSC_USE_COMPLEX)
919       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
920       /*
921        PetscMPIInt    rank;
922        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
923        PetscSynchronizedFlush(comm);
924        */
925       n = (PetscInt)local_n0*dim[1];
926       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
927 #else
928       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
929       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
930       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
931 #endif
932       break;
933     case 3:
934 //      printf("The value of alloc local is %d",alloc_local);
935 #if defined(PETSC_USE_COMPLEX)
936       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
937       n = (PetscInt)local_n0*dim[1]*dim[2];
938       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
939 #else
940       printf("The code comes here\n");
941       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
942       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
943       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
944 #endif
945       break;
946     default:
947 #if defined(PETSC_USE_COMPLEX)
948       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
949 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
950 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
951       n = (PetscInt)local_n0*partial_dim;
952 //      printf("New partial dimension is %d %d %d",n,N,ndim);
953       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
954 #else
955       temp = pdim[ndim-1];
956       pdim[ndim-1]= temp/2 + 1;
957       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
958       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
959       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
960       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
961       pdim[ndim-1] = temp;
962       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
963       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
964 #endif
965       break;
966     }
967   }
968   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
969   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
970   fft->data = (void*)fftw;
971 
972   fft->n           = n;
973   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
974   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
975   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
976 
977   fftw->p_forward  = 0;
978   fftw->p_backward = 0;
979   fftw->p_flag     = FFTW_ESTIMATE;
980 
981   if (size == 1){
982     A->ops->mult          = MatMult_SeqFFTW;
983     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
984   } else {
985     A->ops->mult          = MatMult_MPIFFTW;
986     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
987   }
988   fft->matdestroy          = MatDestroy_FFTW;
989   A->ops->getvecs       = MatGetVecs_FFTW;
990   A->assembled          = PETSC_TRUE;
991 #if !defined(PETSC_USE_COMPLEX)
992   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
993   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
994 #endif
995 
996   /* get runtime options */
997   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
998     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
999     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1000   PetscOptionsEnd();
1001   PetscFunctionReturn(0);
1002 }
1003 EXTERN_C_END
1004 
1005 
1006 
1007 
1008