xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 0da9a4f0a449d38ef004cd40281e9c355707fca0)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "MatMult_SeqFFTW"
31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32 {
33   PetscErrorCode ierr;
34   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36   PetscScalar    *x_array,*y_array;
37   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38 
39   PetscFunctionBegin;
40 //#if !defined(PETSC_USE_COMPLEX)
41 
42 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
43 //#endif
44   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
45   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
46   if (!fftw->p_forward){ /* create a plan, then excute it */
47     switch (ndim){
48     case 1:
49 #if defined(PETSC_USE_COMPLEX)
50       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
51 #else
52       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
53 #endif
54       break;
55     case 2:
56 #if defined(PETSC_USE_COMPLEX)
57       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
58 #else
59       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
60 #endif
61       break;
62     case 3:
63 #if defined(PETSC_USE_COMPLEX)
64       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
65 #else
66       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
67 #endif
68       break;
69     default:
70 #if defined(PETSC_USE_COMPLEX)
71       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
72 #else
73       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
74 #endif
75       break;
76     }
77     fftw->finarray  = x_array;
78     fftw->foutarray = y_array;
79     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
80                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
81     fftw_execute(fftw->p_forward);
82   } else { /* use existing plan */
83     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
84       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
85     } else {
86       fftw_execute(fftw->p_forward);
87     }
88   }
89   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
90   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
91   PetscFunctionReturn(0);
92 }
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
96 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
97 {
98   PetscErrorCode ierr;
99   Mat_FFT        *fft = (Mat_FFT*)A->data;
100   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
101   PetscScalar    *x_array,*y_array;
102   PetscInt       ndim=fft->ndim,*dim=fft->dim;
103 
104   PetscFunctionBegin;
105 //#if !defined(PETSC_USE_COMPLEX)
106 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
107 //#endif
108   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
109   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
110   if (!fftw->p_backward){ /* create a plan, then excute it */
111     switch (ndim){
112     case 1:
113 #if defined(PETSC_USE_COMPLEX)
114       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
115 #else
116       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
117 #endif
118       break;
119     case 2:
120 #if defined(PETSC_USE_COMPLEX)
121       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
122 #else
123       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
124 #endif
125       break;
126     case 3:
127 #if defined(PETSC_USE_COMPLEX)
128       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
129 #else
130       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
131 #endif
132       break;
133     default:
134 #if defined(PETSC_USE_COMPLEX)
135       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
136 #else
137       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
138 #endif
139       break;
140     }
141     fftw->binarray  = x_array;
142     fftw->boutarray = y_array;
143     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
144   } else { /* use existing plan */
145     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
146       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
147     } else {
148       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
149     }
150   }
151   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
152   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
153   PetscFunctionReturn(0);
154 }
155 
156 #undef __FUNCT__
157 #define __FUNCT__ "MatMult_MPIFFTW"
158 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
159 {
160   PetscErrorCode ierr;
161   Mat_FFT        *fft  = (Mat_FFT*)A->data;
162   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
163   PetscScalar    *x_array,*y_array;
164   PetscInt       ndim=fft->ndim,*dim=fft->dim;
165   MPI_Comm       comm=((PetscObject)A)->comm;
166 // PetscInt ctr;
167 //  ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
168 //  ndim1=(ptrdiff_t) ndim;
169 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
170 
171 //  for(ctr=0;ctr<ndim;ctr++)
172 //     {
173 //      pdim[ctr] = dim[ctr];
174 //     }
175 
176   PetscFunctionBegin;
177 //#if !defined(PETSC_USE_COMPLEX)
178 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
179 //#endif
180 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
181 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
182 
183   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
184   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
185   if (!fftw->p_forward){ /* create a plan, then excute it */
186     switch (ndim){
187     case 1:
188 #if defined(PETSC_USE_COMPLEX)
189       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
190 #endif
191       break;
192     case 2:
193 #if defined(PETSC_USE_COMPLEX)
194       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
195 #else
196       printf("The code comes here \n");
197       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
198 #endif
199       break;
200     case 3:
201 #if defined(PETSC_USE_COMPLEX)
202       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
203 #else
204       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
205 #endif
206       break;
207     default:
208 #if defined(PETSC_USE_COMPLEX)
209       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
210 #else
211       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
212 #endif
213  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
214       break;
215     }
216     fftw->finarray  = x_array;
217     fftw->foutarray = y_array;
218     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
219                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
220     fftw_execute(fftw->p_forward);
221   } else { /* use existing plan */
222     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
223       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
224     } else {
225       fftw_execute(fftw->p_forward);
226     }
227   }
228   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
229   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
235 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
236 {
237   PetscErrorCode ierr;
238   Mat_FFT        *fft  = (Mat_FFT*)A->data;
239   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
240   PetscScalar    *x_array,*y_array;
241   PetscInt       ndim=fft->ndim,*dim=fft->dim;
242   MPI_Comm       comm=((PetscObject)A)->comm;
243 //  PetscInt       ctr;
244 //  ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
245 //  ndim1=(ptrdiff_t) ndim;
246 //  pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
247 
248 //  for(ctr=0;ctr<ndim;ctr++)
249 //     {
250 //      pdim[ctr] = dim[ctr];
251 //     }
252 
253   PetscFunctionBegin;
254 //#if !defined(PETSC_USE_COMPLEX)
255 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
256 //#endif
257 //  ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr);
258 // should pdim be a member of Mat_FFTW?
259 //  for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
260 
261   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
262   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
263   if (!fftw->p_backward){ /* create a plan, then excute it */
264     switch (ndim){
265     case 1:
266 #if defined(PETSC_USE_COMPLEX)
267       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
268 #endif
269       break;
270     case 2:
271 #if defined(PETSC_USE_COMPLEX)
272       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
273 #else
274       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
275 #endif
276       break;
277     case 3:
278 #if defined(PETSC_USE_COMPLEX)
279       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
280 #else
281       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
282 #endif
283       break;
284     default:
285 #if defined(PETSC_USE_COMPLEX)
286       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
287 #else
288       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
289 #endif
290 //      fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
291       break;
292     }
293     fftw->binarray  = x_array;
294     fftw->boutarray = y_array;
295     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
296   } else { /* use existing plan */
297     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
298       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
299     } else {
300       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
301     }
302   }
303   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
304   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatDestroy_FFTW"
310 PetscErrorCode MatDestroy_FFTW(Mat A)
311 {
312   Mat_FFT        *fft = (Mat_FFT*)A->data;
313   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
314   PetscErrorCode ierr;
315 
316   PetscFunctionBegin;
317 //#if !defined(PETSC_USE_COMPLEX)
318 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
319 //#endif
320   fftw_destroy_plan(fftw->p_forward);
321   fftw_destroy_plan(fftw->p_backward);
322   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
323   ierr = PetscFree(fft->data);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
328 #undef __FUNCT__
329 #define __FUNCT__ "VecDestroy_MPIFFTW"
330 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
331 {
332   PetscErrorCode  ierr;
333   PetscScalar     *array;
334 
335   PetscFunctionBegin;
336 //#if !defined(PETSC_USE_COMPLEX)
337 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
338 //#endif
339   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
340   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
341   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
342   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "MatGetVecs1DC_FFTW"
348 /*
349    MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the
350      parallel layout determined by FFTW-1D
351 
352 */
353 PetscErrorCode  MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout)
354 {
355   PetscErrorCode ierr;
356   PetscMPIInt    size,rank;
357   MPI_Comm       comm=((PetscObject)A)->comm;
358   Mat_FFT        *fft = (Mat_FFT*)A->data;
359 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
360   PetscInt       N=fft->N;
361   PetscInt       ndim=fft->ndim,*dim=fft->dim;
362   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
363   ptrdiff_t      f_local_n1,f_local_1_end;
364   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
365   ptrdiff_t      b_local_n1,b_local_1_end;
366   fftw_complex   *data_fin,*data_fout,*data_bin,*data_bout;
367 
368   PetscFunctionBegin;
369 #if !defined(PETSC_USE_COMPLEX)
370   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
371 #endif
372   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
373   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
374   if (size == 1){
375     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
376   }
377   else {
378       if (ndim>1){
379         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
380       else {
381           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
382           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
383           if (fin) {
384             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
385             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
386             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
387           }
388           if (fout) {
389             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
390             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
391             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
392           }
393           if (bout) {
394             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
395             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
396             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
397           }
398   }
399   if (fin){
400     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
401   }
402   if (fout){
403     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
404   }
405   if (bout){
406     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatGetVecs_FFTW"
416 /*
417    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
418      parallel layout determined by FFTW
419 
420    Collective on Mat
421 
422    Input Parameter:
423 .  mat - the matrix
424 
425    Output Parameter:
426 +   fin - (optional) input vector of forward FFTW
427 -   fout - (optional) output vector of forward FFTW
428 
429   Level: advanced
430 
431 .seealso: MatCreateFFTW()
432 */
433 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
434 {
435   PetscErrorCode ierr;
436   PetscMPIInt    size,rank;
437   MPI_Comm       comm=((PetscObject)A)->comm;
438   Mat_FFT        *fft = (Mat_FFT*)A->data;
439   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
440   PetscInt       N=fft->N, N1, n1,vsize;
441   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
442 
443   PetscFunctionBegin;
444 //#if !defined(PETSC_USE_COMPLEX)
445 //  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
446 //#endif
447   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
448   PetscValidType(A,1);
449 
450   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
451   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
452   if (size == 1){ /* sequential case */
453 #if defined(PETSC_USE_COMPLEX)
454     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
455     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
456 #else
457     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
458     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
459     printf("The code successfully comes at the end of the routine with one processor\n");
460 #endif
461   } else {        /* mpi case */
462     ptrdiff_t      alloc_local,local_n0,local_0_start;
463     ptrdiff_t      local_n1,local_1_end;
464     fftw_complex   *data_fin,*data_fout;
465     double         *data_finr ;
466     ptrdiff_t      local_1_start,temp;
467 //    PetscInt ctr;
468 //    ptrdiff_t      ndim1,*pdim;
469 //    ndim1=(ptrdiff_t) ndim;
470 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
471 
472 //    for(ctr=0;ctr<ndim;ctr++)
473 //        {k
474 //           pdim[ctr] = dim[ctr];
475 //       }
476 
477 
478 
479     switch (ndim){
480     case 1:
481       /* Get local size */
482       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
483 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
484       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
485       if (fin) {
486         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
487         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
488         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
489       }
490       if (fout) {
491         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
493         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
494       }
495       break;
496     case 2:
497 #if !defined(PETSC_USE_COMPLEX)
498       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
499       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
500       if (fin) {
501         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
502         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
503         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
504         //printf("The code comes here with vector size %d\n",vsize);
505         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
506       }
507       if (fout) {
508         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
509         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
510         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
511       }
512       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
513 
514 #else
515       /* Get local size */
516      printf("Hope this does not come here");
517       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
518       if (fin) {
519         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
520         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
521         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
522       }
523       if (fout) {
524         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
525         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
526         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
527       }
528      printf("Hope this does not come here");
529 #endif
530       break;
531     case 3:
532       /* Get local size */
533 #if !defined(PETSC_USE_COMPLEX)
534       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
535       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
536       if (fin) {
537         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
538         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
539         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
540         //printf("The code comes here with vector size %d\n",vsize);
541         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
542       }
543       if (fout) {
544         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
545         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
546         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
547       }
548       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
549 
550 
551 #else
552       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
553 //      printf("The quantity n is %d",n);
554       if (fin) {
555         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
556         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
557         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
558       }
559       if (fout) {
560         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
561         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
562         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
563       }
564 #endif
565       break;
566     default:
567       /* Get local size */
568 #if !defined(PETSC_USE_COMPLEX)
569       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
570       printf("The value of temp is %ld\n",temp);
571       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
572       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
573       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
574       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
575       if (fin) {
576         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
577         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
578         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
579         //printf("The code comes here with vector size %d\n",vsize);
580         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
581       }
582       if (fout) {
583         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
584         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
585         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
586       }
587 
588 #else
589       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
590 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
591 //      printf("The value of alloc local is %d",alloc_local);
592 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
593 //      for(i=0;i<ndim;i++)
594 //         {
595 //          pdim[i]=dim[i];printf("%d",pdim[i]);
596 //         }
597 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
598 //      printf("The quantity n is %d",n);
599       if (fin) {
600         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
601         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
602         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
603       }
604       if (fout) {
605         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
606         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
607         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
608       }
609 #endif
610       break;
611     }
612   }
613 //  if (fin){
614 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
615 //  }
616 //  if (fout){
617 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
618 //  }
619   PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "InputTransformFFT"
624 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
625 {
626   PetscErrorCode ierr;
627   PetscFunctionBegin;
628   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
629   PetscFunctionReturn(0);
630 }
631 
632 /*
633       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block
634   Input A, x, y
635   A - FFTW matrix
636   x - user data
637   Options Database Keys:
638 + -mat_fftw_plannerflags - set FFTW planner flags
639 
640    Level: intermediate
641 
642 */
643 
644 EXTERN_C_BEGIN
645 #undef __FUNCT__
646 #define __FUNCT__ "InputTransformFFT_FTTW"
647 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
648 {
649   PetscErrorCode ierr;
650   MPI_Comm       comm=((PetscObject)A)->comm;
651   Mat_FFT        *fft = (Mat_FFT*)A->data;
652   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
653   PetscInt       N=fft->N, N1, n1 ,NM;
654   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
655   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
656   PetscInt       i,j,k,rank,size,partial_dim;
657   ptrdiff_t      alloc_local,local_n0,local_0_start;
658   ptrdiff_t      local_n1,local_1_start,temp;
659   VecScatter     vecscat;
660   IS             list1,list2;
661 
662   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
663   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
664   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
665   printf("Local ownership starts at %d\n",low);
666 
667   if (size==1)
668     {
669      switch (ndim){
670      case 1:
671           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
672           for (i=0;i<dim[0];i++)
673              {
674               indx1[i] = i;
675              }
676           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
677           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
678           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
679           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
680           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
681           ierr = ISDestroy(&list1);CHKERRQ(ierr);
682           ierr = PetscFree(indx1);CHKERRQ(ierr);
683      break;
684 
685      case 2:
686           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
687           for (i=0;i<dim[0];i++){
688              for (j=0;j<dim[1];j++){
689                 indx1[i*dim[1]+j] = i*dim[1] + j;
690              }
691           }
692           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
693           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
694           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
695           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
696           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
697           ierr = ISDestroy(&list1);CHKERRQ(ierr);
698           ierr = PetscFree(indx1);CHKERRQ(ierr);
699           break;
700      case 3:
701           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
702           for (i=0;i<dim[0];i++){
703              for (j=0;j<dim[1];j++){
704                 for (k=0;k<dim[2];k++){
705                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
706                 }
707              }
708           }
709           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
710           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
711           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
712           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
713           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
714           ierr = ISDestroy(&list1);CHKERRQ(ierr);
715           ierr = PetscFree(indx1);CHKERRQ(ierr);
716           break;
717      default:
718           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);
719           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
720           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
721           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
723           ierr = ISDestroy(&list1);CHKERRQ(ierr);
724           //ierr = ISDestroy(list1);CHKERRQ(ierr);
725           break;
726       }
727     }
728 
729  else{
730 
731  switch (ndim){
732  case 1:
733   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
734   break;
735  case 2:
736       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
737       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
738 
739       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
740       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
741       printf("Val local_0_start = %ld\n",local_0_start);
742 
743       if (dim[1]%2==0)
744         NM = dim[1]+2;
745       else
746         NM = dim[1]+1;
747 
748       for (i=0;i<local_n0;i++){
749          for (j=0;j<dim[1];j++){
750             tempindx = i*dim[1] + j;
751             tempindx1 = i*NM + j;
752             indx1[tempindx]=local_0_start*dim[1]+tempindx;
753             indx2[tempindx]=low+tempindx1;
754            // printf("Val tempindx1 = %d\n",tempindx1);
755   //          printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
756   //          printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
757   //          printf("-------------------------\n",indx2[tempindx],rank);
758         }
759      }
760 
761       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
762       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
763 
764       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
765       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
766       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
767       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
768       ierr = ISDestroy(&list1);CHKERRQ(ierr);
769       ierr = ISDestroy(&list2);CHKERRQ(ierr);
770       ierr = PetscFree(indx1);CHKERRQ(ierr);
771       ierr = PetscFree(indx2);CHKERRQ(ierr);
772       break;
773 
774  case 3:
775       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
776       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
777 
778       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
779       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
780       printf("Val local_0_start = %ld\n",local_0_start);
781 
782       if (dim[2]%2==0)
783         NM = dim[2]+2;
784       else
785         NM = dim[2]+1;
786 
787       for (i=0;i<local_n0;i++){
788          for (j=0;j<dim[1];j++){
789             for (k=0;k<dim[2];k++){
790                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
791                tempindx1 = i*dim[1]*NM + j*NM + k;
792                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
793                indx2[tempindx]=low+tempindx1;
794             }
795            // printf("Val tempindx1 = %d\n",tempindx1);
796            // printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
797            // printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
798            // printf("-------------------------\n",indx2[tempindx],rank);
799         }
800      }
801 
802       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
803       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
804 
805       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
806       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
807       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
808       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
809       ierr = ISDestroy(&list1);CHKERRQ(ierr);
810       ierr = ISDestroy(&list2);CHKERRQ(ierr);
811       ierr = PetscFree(indx1);CHKERRQ(ierr);
812       ierr = PetscFree(indx2);CHKERRQ(ierr);
813       break;
814 
815  default:
816       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
817       printf("The value of temp is %ld\n",temp);
818       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
819       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
820       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
821       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
822 
823       partial_dim = fftw->partial_dim;
824       printf("The value of partial dim is %d\n",partial_dim);
825 
826       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
827       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
828       printf("Val local_0_start = %ld\n",local_0_start);
829 
830       if (dim[ndim-1]%2==0)
831         NM = 2;
832       else
833         NM = 1;
834 
835       j = low;
836       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
837          {
838           indx1[i] = local_0_start*partial_dim + i;
839           indx2[i] = j;
840           //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
841           if (k%dim[ndim-1]==0)
842             { j+=NM;}
843           j++;
844          }
845       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
846       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
847       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
848       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
849       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
850       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
851       ierr = ISDestroy(&list1);CHKERRQ(ierr);
852       ierr = ISDestroy(&list2);CHKERRQ(ierr);
853       ierr = PetscFree(indx1);CHKERRQ(ierr);
854       ierr = PetscFree(indx2);CHKERRQ(ierr);
855       break;
856   }
857  }
858 
859  return 0;
860 }
861 EXTERN_C_END
862 
863 #undef __FUNCT__
864 #define __FUNCT__ "OutputTransformFFT"
865 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
866 {
867   PetscErrorCode ierr;
868   PetscFunctionBegin;
869   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 /*
874       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
875   Input A, x, y
876   A - FFTW matrix
877   x - FFTW vector
878   y - PETSc vector that user can read
879   Options Database Keys:
880 + -mat_fftw_plannerflags - set FFTW planner flags
881 
882    Level: intermediate
883 
884 */
885 
886 EXTERN_C_BEGIN
887 #undef __FUNCT__
888 #define __FUNCT__ "OutputTransformFFT_FTTW"
889 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
890 {
891   PetscErrorCode ierr;
892   MPI_Comm       comm=((PetscObject)A)->comm;
893   Mat_FFT        *fft = (Mat_FFT*)A->data;
894   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
895   PetscInt       N=fft->N, N1, n1 ,NM;
896   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
897   PetscInt       low, *indx1, *indx2, tempindx, tempindx1;
898   PetscInt       i,j,k,rank,size,partial_dim;
899   ptrdiff_t      alloc_local,local_n0,local_0_start;
900   ptrdiff_t      local_n1,local_1_start,temp;
901   VecScatter     vecscat;
902   IS             list1,list2;
903 
904   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
905   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
906   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);
907   printf("Local ownership starts at %d\n",low);
908 
909   if (size==1){
910     switch (ndim){
911     case 1:
912            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
913           for (i=0;i<dim[0];i++)
914              {
915               indx1[i] = i;
916              }
917           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
918           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
919           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
922           ierr = ISDestroy(&list1);CHKERRQ(ierr);
923           ierr = PetscFree(indx1);CHKERRQ(ierr);
924           break;
925 
926     case 2:
927          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
928           for (i=0;i<dim[0];i++){
929              for (j=0;j<dim[1];j++){
930                 indx1[i*dim[1]+j] = i*dim[1] + j;
931              }
932           }
933          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
934          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
935          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
937          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
938          ierr = ISDestroy(&list1);CHKERRQ(ierr);
939          ierr = PetscFree(indx1);CHKERRQ(ierr);
940          break;
941     case 3:
942          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
943          for (i=0;i<dim[0];i++){
944             for (j=0;j<dim[1];j++){
945                for (k=0;k<dim[2];k++){
946                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
947                }
948             }
949          }
950          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
951          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
952          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
953          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
955          ierr = ISDestroy(&list1);CHKERRQ(ierr);
956          ierr = PetscFree(indx1);CHKERRQ(ierr);
957          break;
958     default:
959          ierr = ISCreateStride(comm,N,0,1,&list1);
960          //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
961          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
962          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
964          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
965          ierr = ISDestroy(&list1);CHKERRQ(ierr);
966          break;
967     }
968   }
969   else{
970 
971  switch (ndim){
972  case 1:
973   SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
974   break;
975  case 2:
976       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
977       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
978 
979      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
980      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
981      printf("Val local_0_start = %ld\n",local_0_start);
982 
983      if (dim[1]%2==0)
984       NM = dim[1]+2;
985     else
986       NM = dim[1]+1;
987 
988 
989 
990      for (i=0;i<local_n0;i++){
991         for (j=0;j<dim[1];j++){
992             tempindx = i*dim[1] + j;
993             tempindx1 = i*NM + j;
994             indx1[tempindx]=local_0_start*dim[1]+tempindx;
995             indx2[tempindx]=low+tempindx1;
996        //     printf("Val tempindx1 = %d\n",tempindx1);
997        //     printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
998        //     printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
999        //     printf("-------------------------\n",indx2[tempindx],rank);
1000         }
1001      }
1002 
1003      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1004      ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1005 
1006      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1007      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1008      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1009      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1010      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1011      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1012      ierr = PetscFree(indx1);CHKERRQ(ierr);
1013      ierr = PetscFree(indx2);CHKERRQ(ierr);
1014   break;
1015 
1016  case 3:
1017       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1018       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1019 
1020      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1021      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1022      printf("Val local_0_start = %ld\n",local_0_start);
1023 
1024      if (dim[2]%2==0)
1025       NM = dim[2]+2;
1026     else
1027       NM = dim[2]+1;
1028 
1029      for (i=0;i<local_n0;i++){
1030         for (j=0;j<dim[1];j++){
1031            for (k=0;k<dim[2];k++){
1032               tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1033               tempindx1 = i*dim[1]*NM + j*NM + k;
1034               indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1035               indx2[tempindx]=low+tempindx1;
1036            }
1037         //    printf("Val tempindx1 = %d\n",tempindx1);
1038         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1039         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1040         //    printf("-------------------------\n",indx2[tempindx],rank);
1041         }
1042      }
1043 
1044      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1045      ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1046 
1047      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1048      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1049      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1050      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1051      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1052      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1053      ierr = PetscFree(indx1);CHKERRQ(ierr);
1054      ierr = PetscFree(indx2);CHKERRQ(ierr);
1055   break;
1056 
1057   default:
1058      temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1059      printf("The value of temp is %ld\n",temp);
1060      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1061      alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1062      N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1063      (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1064 
1065      partial_dim = fftw->partial_dim;
1066      printf("The value of partial dim is %d\n",partial_dim);
1067 
1068      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1069      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1070      printf("Val local_0_start = %ld\n",local_0_start);
1071 
1072      if (dim[ndim-1]%2==0)
1073        NM = 2;
1074      else
1075        NM = 1;
1076 
1077      j = low;
1078      for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1079         {
1080          indx1[i] = local_0_start*partial_dim + i;
1081          indx2[i] = j;
1082          //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank);
1083          if (k%dim[ndim-1]==0)
1084            { j+=NM;}
1085          j++;
1086         }
1087      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1088      ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1089 
1090       //ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1091 
1092 
1093      ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1094      ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1095      ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1096      ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1097      ierr = ISDestroy(&list1);CHKERRQ(ierr);
1098      ierr = ISDestroy(&list2);CHKERRQ(ierr);
1099      ierr = PetscFree(indx1);CHKERRQ(ierr);
1100      ierr = PetscFree(indx2);CHKERRQ(ierr);
1101 
1102   break;
1103  }
1104  }
1105  return 0;
1106 }
1107 EXTERN_C_END
1108 
1109 EXTERN_C_BEGIN
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatCreate_FFTW"
1112 /*
1113       MatCreate_FFTW - Creates a matrix object that provides FFT
1114   via the external package FFTW
1115   Options Database Keys:
1116 + -mat_fftw_plannerflags - set FFTW planner flags
1117 
1118    Level: intermediate
1119 
1120 */
1121 
1122 PetscErrorCode MatCreate_FFTW(Mat A)
1123 {
1124   PetscErrorCode ierr;
1125   MPI_Comm       comm=((PetscObject)A)->comm;
1126   Mat_FFT        *fft=(Mat_FFT*)A->data;
1127   Mat_FFTW       *fftw;
1128   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1129   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1130   PetscBool      flg;
1131   PetscInt       p_flag,partial_dim=1,ctr,N1;
1132   PetscMPIInt    size,rank;
1133   ptrdiff_t      *pdim, temp;
1134   ptrdiff_t      local_n1,local_1_start;
1135 
1136   PetscFunctionBegin;
1137 //#if !defined(PETSC_USE_COMPLEX)
1138 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
1139 //#endif
1140 
1141   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1142   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1143   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1144 
1145   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1146   pdim[0] = dim[0];
1147   for(ctr=1;ctr<ndim;ctr++)
1148       {
1149           partial_dim *= dim[ctr];
1150           pdim[ctr] = dim[ctr];
1151       }
1152 //#if !defined(PETSC_USE_COMPLEX)
1153 //  SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
1154 //#endif
1155 
1156 //  printf("partial dimension is %d",partial_dim);
1157   if (size == 1) {
1158 #if defined(PETSC_USE_COMPLEX)
1159     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1160     n = N;
1161 #else
1162     int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1163     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1164     n = tot_dim;
1165 #endif
1166 
1167   } else {
1168     ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end;
1169     switch (ndim){
1170     case 1:
1171 #if !defined(PETSC_USE_COMPLEX)
1172   SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1173 #else
1174       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
1175       n = (PetscInt)local_n0;
1176       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1177 #endif
1178 //      PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW);
1179       break;
1180     case 2:
1181 #if defined(PETSC_USE_COMPLEX)
1182       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1183       /*
1184        PetscMPIInt    rank;
1185        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1186        PetscSynchronizedFlush(comm);
1187        */
1188       n = (PetscInt)local_n0*dim[1];
1189       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1190 #else
1191       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1192       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1193       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1194 #endif
1195       break;
1196     case 3:
1197 //      printf("The value of alloc local is %d",alloc_local);
1198 #if defined(PETSC_USE_COMPLEX)
1199       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1200       n = (PetscInt)local_n0*dim[1]*dim[2];
1201       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1202 #else
1203       printf("The code comes here\n");
1204       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1205       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1206       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1207 #endif
1208       break;
1209     default:
1210 #if defined(PETSC_USE_COMPLEX)
1211       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1212 //      printf("The value of alloc local is %ld from process %d\n",alloc_local,rank);
1213 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
1214       n = (PetscInt)local_n0*partial_dim;
1215 //      printf("New partial dimension is %d %d %d",n,N,ndim);
1216       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1217 #else
1218       temp = pdim[ndim-1];
1219       pdim[ndim-1]= temp/2 + 1;
1220       printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]);
1221       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1222       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1223       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1224       pdim[ndim-1] = temp;
1225       printf("For Multi dim case n = %d, N1  = %d\n",n,N1);
1226       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1227 #endif
1228       break;
1229     }
1230   }
1231   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1232   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1233   fft->data = (void*)fftw;
1234 
1235   fft->n           = n;
1236   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1237   fftw->partial_dim = partial_dim;
1238   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1239   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1240 
1241   fftw->p_forward  = 0;
1242   fftw->p_backward = 0;
1243   fftw->p_flag     = FFTW_ESTIMATE;
1244 
1245   if (size == 1){
1246     A->ops->mult          = MatMult_SeqFFTW;
1247     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1248   } else {
1249     A->ops->mult          = MatMult_MPIFFTW;
1250     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1251   }
1252   fft->matdestroy          = MatDestroy_FFTW;
1253 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1254   A->ops->getvecs       = MatGetVecs_FFTW;
1255   A->assembled          = PETSC_TRUE;
1256 #if !defined(PETSC_USE_COMPLEX)
1257   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1258   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1259 #endif
1260 
1261   /* get runtime options */
1262   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1263     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1264     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1265   PetscOptionsEnd();
1266   PetscFunctionReturn(0);
1267 }
1268 EXTERN_C_END
1269 
1270 
1271 
1272 
1273