xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision fd4b34e9bf1ddd70e4f28b8731e6a5d01ccda0e5)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed!
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "MatMult_SeqFFTW"
31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32 {
33   PetscErrorCode ierr;
34   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36   PetscScalar    *x_array,*y_array;
37   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38 
39   PetscFunctionBegin;
40   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42   if (!fftw->p_forward){ /* create a plan, then excute it */
43     switch (ndim){
44     case 1:
45 #if defined(PETSC_USE_COMPLEX)
46       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
47 #else
48       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
49 #endif
50       break;
51     case 2:
52 #if defined(PETSC_USE_COMPLEX)
53       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54 #else
55       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
56 #endif
57       break;
58     case 3:
59 #if defined(PETSC_USE_COMPLEX)
60       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
61 #else
62       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
63 #endif
64       break;
65     default:
66 #if defined(PETSC_USE_COMPLEX)
67       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68 #else
69       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
70 #endif
71       break;
72     }
73     fftw->finarray  = x_array;
74     fftw->foutarray = y_array;
75     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77     fftw_execute(fftw->p_forward);
78   } else { /* use existing plan */
79     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81     } else {
82       fftw_execute(fftw->p_forward);
83     }
84   }
85   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93 {
94   PetscErrorCode ierr;
95   Mat_FFT        *fft = (Mat_FFT*)A->data;
96   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97   PetscScalar    *x_array,*y_array;
98   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99 
100   PetscFunctionBegin;
101   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103   if (!fftw->p_backward){ /* create a plan, then excute it */
104     switch (ndim){
105     case 1:
106 #if defined(PETSC_USE_COMPLEX)
107       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
108 #else
109       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
110 #endif
111       break;
112     case 2:
113 #if defined(PETSC_USE_COMPLEX)
114       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
115 #else
116       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
117 #endif
118       break;
119     case 3:
120 #if defined(PETSC_USE_COMPLEX)
121       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
122 #else
123       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
124 #endif
125       break;
126     default:
127 #if defined(PETSC_USE_COMPLEX)
128       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
129 #else
130       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
131 #endif
132       break;
133     }
134     fftw->binarray  = x_array;
135     fftw->boutarray = y_array;
136     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137   } else { /* use existing plan */
138     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140     } else {
141       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142     }
143   }
144   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "MatMult_MPIFFTW"
151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152 {
153   PetscErrorCode ierr;
154   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156   PetscScalar    *x_array,*y_array;
157   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158   MPI_Comm       comm=((PetscObject)A)->comm;
159 
160   PetscFunctionBegin;
161   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163   if (!fftw->p_forward){ /* create a plan, then excute it */
164     switch (ndim){
165     case 1:
166 #if defined(PETSC_USE_COMPLEX)
167       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168 #else
169       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
170 #endif
171       break;
172     case 2:
173 #if defined(PETSC_USE_COMPLEX)
174       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
175 #else
176       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
177 #endif
178       break;
179     case 3:
180 #if defined(PETSC_USE_COMPLEX)
181       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
182 #else
183       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
184 #endif
185       break;
186     default:
187 #if defined(PETSC_USE_COMPLEX)
188       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
189 #else
190       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
191 #endif
192       break;
193     }
194     fftw->finarray  = x_array;
195     fftw->foutarray = y_array;
196     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198     fftw_execute(fftw->p_forward);
199   } else { /* use existing plan */
200     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202     } else {
203       fftw_execute(fftw->p_forward);
204     }
205   }
206   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214 {
215   PetscErrorCode ierr;
216   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218   PetscScalar    *x_array,*y_array;
219   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220   MPI_Comm       comm=((PetscObject)A)->comm;
221 
222   PetscFunctionBegin;
223   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225   if (!fftw->p_backward){ /* create a plan, then excute it */
226     switch (ndim){
227     case 1:
228 #if defined(PETSC_USE_COMPLEX)
229       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230 #else
231       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
232 #endif
233       break;
234     case 2:
235 #if defined(PETSC_USE_COMPLEX)
236       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
237 #else
238       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
239 #endif
240       break;
241     case 3:
242 #if defined(PETSC_USE_COMPLEX)
243       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
244 #else
245       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
246 #endif
247       break;
248     default:
249 #if defined(PETSC_USE_COMPLEX)
250       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
251 #else
252       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
253 #endif
254       break;
255     }
256     fftw->binarray  = x_array;
257     fftw->boutarray = y_array;
258     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259   } else { /* use existing plan */
260     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262     } else {
263       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264     }
265   }
266   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_FFTW"
273 PetscErrorCode MatDestroy_FFTW(Mat A)
274 {
275   Mat_FFT        *fft = (Mat_FFT*)A->data;
276   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   fftw_destroy_plan(fftw->p_forward);
281   fftw_destroy_plan(fftw->p_backward);
282   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288 #undef __FUNCT__
289 #define __FUNCT__ "VecDestroy_MPIFFTW"
290 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291 {
292   PetscErrorCode  ierr;
293   PetscScalar     *array;
294 
295   PetscFunctionBegin;
296   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatGetVecsFFTW_1DC"
305 /*
306     - Get Vectors(s) compatible with matrix, i.e. with the
307      parallel layout determined by FFTW-1D
308 
309 */
310 PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout)
311 {
312   PetscErrorCode ierr;
313   PetscMPIInt    size,rank;
314   MPI_Comm       comm=((PetscObject)A)->comm;
315   Mat_FFT        *fft = (Mat_FFT*)A->data;
316 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
317   PetscInt       N=fft->N;
318   PetscInt       ndim=fft->ndim,*dim=fft->dim;
319   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
320   ptrdiff_t      f_local_n1,f_local_1_end;
321   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
322   ptrdiff_t      b_local_n1,b_local_1_end;
323   fftw_complex   *data_fin,*data_fout,*data_bout;
324 
325   PetscFunctionBegin;
326 #if !defined(PETSC_USE_COMPLEX)
327   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
328 #endif
329   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
330   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
331   if (size == 1){
332     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
333   }
334   else {
335       if (ndim>1){
336         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
337       else {
338           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
339           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
340           if (fin) {
341             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
342             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
343             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
344           }
345           if (fout) {
346             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
347             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
348             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
349           }
350           if (bout) {
351             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
352             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
353             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
354           }
355   }
356   if (fin){
357     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
358   }
359   if (fout){
360     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
361   }
362   if (bout){
363     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 
369 }
370 
371 #undef __FUNCT__
372 #define __FUNCT__ "MatGetVecsFFTW"
373 /*@
374    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
375      parallel layout determined by FFTW
376 
377    Collective on Mat
378 
379    Input Parameter:
380 .  mat - the matrix
381 
382    Output Parameter:
383 +   fin - (optional) input vector of forward FFTW
384 -   fout - (optional) output vector of forward FFTW
385 
386   Level: advanced
387 
388 .seealso: MatCreateFFTW()
389 @*/
390 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
391 {
392   PetscErrorCode ierr;
393   PetscFunctionBegin;
394   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 EXTERN_C_BEGIN
399 #undef __FUNCT__
400 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
401 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
402 {
403   PetscErrorCode ierr;
404   PetscMPIInt    size,rank;
405   MPI_Comm       comm=((PetscObject)A)->comm;
406   Mat_FFT        *fft = (Mat_FFT*)A->data;
407   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
408   PetscInt       N=fft->N;
409   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
410 
411   PetscFunctionBegin;
412   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
413   PetscValidType(A,1);
414 
415   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
416   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
417   if (size == 1){ /* sequential case */
418 #if defined(PETSC_USE_COMPLEX)
419     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
420     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
421     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
422 #else
423     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
424     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
425     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
426 #endif
427   } else {
428     ptrdiff_t      alloc_local,local_n0,local_0_start;
429     ptrdiff_t      local_n1;
430     fftw_complex   *data_fout;
431     ptrdiff_t      local_1_start;
432 #if defined(PETSC_USE_COMPLEX)
433     fftw_complex   *data_fin,*data_bout;
434 #else
435     double         *data_finr,*data_boutr;
436     PetscInt       n1,N1,vsize;
437     ptrdiff_t      temp;
438 #endif
439 
440     switch (ndim){
441           case 1:
442 #if !defined(PETSC_USE_COMPLEX)
443                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
444 #else
445                  //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
446                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
447                  if (fin) {
448                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
449                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
450                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
451                          }
452                  if (fout) {
453                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
454                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
455                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
456                          }
457                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
458                  if (bout) {
459                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
460                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
461                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
462                          }
463           break;
464 #endif
465           case 2:
466 #if !defined(PETSC_USE_COMPLEX)
467                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
468                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
469                  if (fin) {
470                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
471                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
472                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
473                           }
474                  if (bout) {
475                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
476                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
477                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
478                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
479                           }
480                  //n1 = 2*local_n1*dim[0];
481                  if (fout) {
482                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
483                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
484                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
485                            }
486 #else
487       /* Get local size */
488                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
489                  if (fin) {
490                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
491                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
492                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
493                           }
494                  if (fout) {
495                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
496                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
497                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
498                            }
499                  if (bout) {
500                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
501                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
502                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
503                           }
504 #endif
505           break;
506           case 3:
507 #if !defined(PETSC_USE_COMPLEX)
508                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
509                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
510                  if (fin) {
511                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
512                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
513                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
514                          }
515                  if (bout) {
516                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
517                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
518                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
519                           }
520                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
521                  if (fout) {
522                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
523                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
524                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
525                           }
526 #else
527                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
528                  if (fin) {
529                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
530                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
531                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
532                          }
533                  if (fout) {
534                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
535                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
536                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
537                          }
538                  if (bout) {
539                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
540                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
541                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
542                          }
543 #endif
544           break;
545           default:
546 #if !defined(PETSC_USE_COMPLEX)
547                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
548                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
549                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
550                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
551                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
552 
553                  if (fin) {
554                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
555                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
556                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
557                         }
558                  if (bout) {
559                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
560                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
561                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
562                         }
563                  //temp = fftw->partial_dim;
564                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
565                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
566                  if (fout) {
567                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
568                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
569                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
570                         }
571 #else
572                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
573                 if (fin) {
574                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
575                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
576                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
577                        }
578                 if (fout) {
579                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
580                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
581                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
582                        }
583                 if (bout) {
584                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
585                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
586                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
587                        }
588 #endif
589             break;
590           }
591   }
592   PetscFunctionReturn(0);
593 }
594 EXTERN_C_END
595 
596 #undef __FUNCT__
597 #define __FUNCT__ "MatGetVecs_FFTW"
598 /*
599    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
600      parallel layout determined by FFTW
601 
602    Collective on Mat
603 
604    Input Parameter:
605 .  mat - the matrix
606 
607    Output Parameter:
608 +   fin - (optional) input vector of forward FFTW
609 -   fout - (optional) output vector of forward FFTW
610 
611   Level: advanced
612 
613 .seealso: MatCreateFFTW()
614 */
615 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
616 {
617   PetscErrorCode ierr;
618   PetscMPIInt    size,rank;
619   MPI_Comm       comm=((PetscObject)A)->comm;
620   Mat_FFT        *fft = (Mat_FFT*)A->data;
621   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
622   PetscInt       N=fft->N;
623   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
624 
625   PetscFunctionBegin;
626   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
627   PetscValidType(A,1);
628 
629   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
630   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
631   if (size == 1){ /* sequential case */
632 #if defined(PETSC_USE_COMPLEX)
633     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
634     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
635 #else
636     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
637     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
638 #endif
639   } else {        /* mpi case */
640     ptrdiff_t      alloc_local,local_n0,local_0_start;
641     ptrdiff_t      local_n1,local_1_end;
642     fftw_complex   *data_fin,*data_fout;
643 #if !defined(PETSC_USE_COMPLEX)
644     double         *data_finr ;
645     ptrdiff_t      local_1_start,temp;
646     PetscInt       vsize,n1,N1;
647 #endif
648 
649 //    PetscInt ctr;
650 //    ptrdiff_t      ndim1,*pdim;
651 //    ndim1=(ptrdiff_t) ndim;
652 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
653 
654 //    for(ctr=0;ctr<ndim;ctr++)
655 //        {k
656 //           pdim[ctr] = dim[ctr];
657 //       }
658 
659 
660 
661     switch (ndim){
662     case 1:
663       /* Get local size */
664       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
665 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
666       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
667       printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1);
668       if (fin) {
669         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
670         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
671         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
672       }
673       if (fout) {
674         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
675         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
676         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
677       }
678       break;
679     case 2:
680 #if !defined(PETSC_USE_COMPLEX)
681       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
682       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
683       if (fin) {
684         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
685         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
686         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
687         //printf("The code comes here with vector size %d\n",vsize);
688         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
689       }
690       n1 = 2*local_n1*(dim[0]);
691       //n1 = 2*local_n1*dim[1];
692       printf("The values are %ld\n",local_n1);
693       if (fout) {
694         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
695         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
696         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
697       }
698       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
699 
700 #else
701       /* Get local size */
702      //printf("Hope this does not come here");
703       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
704       if (fin) {
705         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
706         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
707         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
708       }
709       if (fout) {
710         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
711         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
712         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
713       }
714      //printf("Hope this does not come here");
715 #endif
716       break;
717     case 3:
718       /* Get local size */
719 #if !defined(PETSC_USE_COMPLEX)
720       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
721       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
722       if (fin) {
723         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
724         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
725         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
726         //printf("The code comes here with vector size %d\n",vsize);
727         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
728       }
729       printf("The value is %ld",local_n1);
730       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
731       if (fout) {
732         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
733         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
734         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
735       }
736       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
737 
738 
739 #else
740       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
741 //      printf("The quantity n is %d",n);
742       if (fin) {
743         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
744         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
745         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
746       }
747       if (fout) {
748         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
749         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
750         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
751       }
752 #endif
753       break;
754     default:
755       /* Get local size */
756 #if !defined(PETSC_USE_COMPLEX)
757       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
758       printf("The value of temp is %ld\n",temp);
759       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
760       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
761       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
762       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
763       if (fin) {
764         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
765         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
766         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
767         //printf("The code comes here with vector size %d\n",vsize);
768         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
769       }
770       printf("The value is %ld. Global length is %d \n",local_n1,N1);
771       temp = fftw->partial_dim;
772       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
773 
774       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
775       if (fout) {
776         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
777         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
778         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
779       }
780 
781 #else
782       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
783 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
784 //      printf("The value of alloc local is %d",alloc_local);
785 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
786 //      for(i=0;i<ndim;i++)
787 //         {
788 //          pdim[i]=dim[i];printf("%d",pdim[i]);
789 //         }
790 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
791 //      printf("The quantity n is %d",n);
792       if (fin) {
793         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
794         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
795         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
796       }
797       if (fout) {
798         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
799         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
800         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
801       }
802 #endif
803       break;
804     }
805   }
806 //  if (fin){
807 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
808 //  }
809 //  if (fout){
810 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
811 //  }
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNCT__
816 #define __FUNCT__ "InputTransformFFT"
817 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
818 {
819   PetscErrorCode ierr;
820   PetscFunctionBegin;
821   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
822   PetscFunctionReturn(0);
823 }
824 
825 /*
826       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real
827       parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst
828       changing dimension.
829   Input A, x, y
830   A - FFTW matrix
831   x - user data
832   Options Database Keys:
833 + -mat_fftw_plannerflags - set FFTW planner flags
834 
835    Level: intermediate
836 
837 */
838 
839 EXTERN_C_BEGIN
840 #undef __FUNCT__
841 #define __FUNCT__ "InputTransformFFT_FTTW"
842 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
843 {
844   PetscErrorCode ierr;
845   MPI_Comm       comm=((PetscObject)A)->comm;
846   Mat_FFT        *fft = (Mat_FFT*)A->data;
847   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
848   PetscInt       N=fft->N;
849   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
850   PetscInt       low;
851   PetscInt       rank,size;
852   ptrdiff_t      alloc_local,local_n0,local_0_start;
853   ptrdiff_t      local_n1,local_1_start;
854   VecScatter     vecscat;
855   IS             list1,list2;
856 #if !defined(PETSC_USE_COMPLEX)
857   PetscInt       i,j,k,partial_dim;
858   PetscInt       *indx1, *indx2, tempindx, tempindx1;
859   PetscInt       N1, n1 ,NM;
860   ptrdiff_t      temp;
861 #endif
862 
863   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
864   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
865   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
866 
867   if (size==1)
868     {
869 /*     switch (ndim){
870      case 1:
871           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
872           for (i=0;i<dim[0];i++)
873              {
874               indx1[i] = i;
875              }
876           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
877           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
878           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
881           ierr = ISDestroy(&list1);CHKERRQ(ierr);
882           ierr = PetscFree(indx1);CHKERRQ(ierr);
883      break;
884 
885      case 2:
886           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
887           for (i=0;i<dim[0];i++){
888              for (j=0;j<dim[1];j++){
889                 indx1[i*dim[1]+j] = i*dim[1] + j;
890              }
891           }
892           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
893           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
894           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
897           ierr = ISDestroy(&list1);CHKERRQ(ierr);
898           ierr = PetscFree(indx1);CHKERRQ(ierr);
899           break;
900      case 3:
901           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
902           for (i=0;i<dim[0];i++){
903              for (j=0;j<dim[1];j++){
904                 for (k=0;k<dim[2];k++){
905                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
906                 }
907              }
908           }
909           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
910           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
911           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
912           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
914           ierr = ISDestroy(&list1);CHKERRQ(ierr);
915           ierr = PetscFree(indx1);CHKERRQ(ierr);
916           break;
917      default:
918 */
919           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
920           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
921           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
924           ierr = ISDestroy(&list1);CHKERRQ(ierr);
925  //         break;
926   //    }
927     }
928 
929  else{
930 
931  switch (ndim){
932  case 1:
933 #if defined(PETSC_USE_COMPLEX)
934       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
935       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
936       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
937       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
938       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
941       ierr = ISDestroy(&list1);CHKERRQ(ierr);
942       ierr = ISDestroy(&list2);CHKERRQ(ierr);
943 #else
944       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
945 #endif
946  break;
947  case 2:
948 #if defined(PETSC_USE_COMPLEX)
949       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
950       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
951       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
952       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
953       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
956       ierr = ISDestroy(&list1);CHKERRQ(ierr);
957       ierr = ISDestroy(&list2);CHKERRQ(ierr);
958 #else
959       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
960       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
961       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
962       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
963       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
964       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
965 
966       if (dim[1]%2==0)
967         NM = dim[1]+2;
968       else
969         NM = dim[1]+1;
970 
971       for (i=0;i<local_n0;i++){
972          for (j=0;j<dim[1];j++){
973             tempindx = i*dim[1] + j;
974             tempindx1 = i*NM + j;
975             indx1[tempindx]=local_0_start*dim[1]+tempindx;
976             indx2[tempindx]=low+tempindx1;
977         }
978      }
979 
980       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
981       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
982 
983       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
984       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
985       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
987       ierr = ISDestroy(&list1);CHKERRQ(ierr);
988       ierr = ISDestroy(&list2);CHKERRQ(ierr);
989       ierr = PetscFree(indx1);CHKERRQ(ierr);
990       ierr = PetscFree(indx2);CHKERRQ(ierr);
991 #endif
992  break;
993 
994  case 3:
995 #if defined(PETSC_USE_COMPLEX)
996       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
997       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
998       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
999       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1000       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1001       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1002       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1003       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1005       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1006       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1007 #else
1008       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1009       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1010 
1011 //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1012 //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1013       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1014       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1015 
1016       if (dim[2]%2==0)
1017         NM = dim[2]+2;
1018       else
1019         NM = dim[2]+1;
1020 
1021       for (i=0;i<local_n0;i++){
1022          for (j=0;j<dim[1];j++){
1023             for (k=0;k<dim[2];k++){
1024                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1025                tempindx1 = i*dim[1]*NM + j*NM + k;
1026                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1027                indx2[tempindx]=low+tempindx1;
1028             }
1029          }
1030       }
1031 
1032       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1033       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1034       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1035       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1038       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1039       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1040       ierr = PetscFree(indx1);CHKERRQ(ierr);
1041       ierr = PetscFree(indx2);CHKERRQ(ierr);
1042 #endif
1043  break;
1044 
1045  default:
1046 #if defined(PETSC_USE_COMPLEX)
1047       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1048       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1049       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1050       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1051       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1054       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1055       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1056 #else
1057       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1058       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1059       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1060       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1061       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1062 
1063       partial_dim = fftw->partial_dim;
1064 
1065       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1066       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1067 
1068       if (dim[ndim-1]%2==0)
1069         NM = 2;
1070       else
1071         NM = 1;
1072 
1073       j = low;
1074       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1075          {
1076           indx1[i] = local_0_start*partial_dim + i;
1077           indx2[i] = j;
1078           if (k%dim[ndim-1]==0)
1079             { j+=NM;}
1080           j++;
1081          }
1082       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1083       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1084       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1085       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1086       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1088       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1089       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1090       ierr = PetscFree(indx1);CHKERRQ(ierr);
1091       ierr = PetscFree(indx2);CHKERRQ(ierr);
1092 #endif
1093  break;
1094   }
1095  }
1096 
1097  return 0;
1098 }
1099 EXTERN_C_END
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "OutputTransformFFT"
1103 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
1104 {
1105   PetscErrorCode ierr;
1106   PetscFunctionBegin;
1107   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 /*
1112       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
1113   Input A, x, y
1114   A - FFTW matrix
1115   x - FFTW vector
1116   y - PETSc vector that user can read
1117   Options Database Keys:
1118 + -mat_fftw_plannerflags - set FFTW planner flags
1119 
1120    Level: intermediate
1121 
1122 */
1123 
1124 EXTERN_C_BEGIN
1125 #undef __FUNCT__
1126 #define __FUNCT__ "OutputTransformFFT_FTTW"
1127 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
1128 {
1129   PetscErrorCode ierr;
1130   MPI_Comm       comm=((PetscObject)A)->comm;
1131   Mat_FFT        *fft = (Mat_FFT*)A->data;
1132   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
1133   PetscInt       N=fft->N;
1134   PetscInt       ndim=fft->ndim,*dim=fft->dim;
1135   PetscInt       low;
1136   PetscInt       rank,size;
1137   ptrdiff_t      alloc_local,local_n0,local_0_start;
1138   ptrdiff_t      local_n1,local_1_start;
1139   VecScatter     vecscat;
1140   IS             list1,list2;
1141 #if !defined(PETSC_USE_COMPLEX)
1142   PetscInt       i,j,k,partial_dim;
1143   PetscInt       *indx1, *indx2, tempindx, tempindx1;
1144   PetscInt       N1, n1 ,NM;
1145   ptrdiff_t      temp;
1146 #endif
1147 
1148 
1149   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1150   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1151   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
1152 
1153   if (size==1){
1154 /*
1155     switch (ndim){
1156     case 1:
1157            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1158           for (i=0;i<dim[0];i++)
1159              {
1160               indx1[i] = i;
1161              }
1162           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1163           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1164           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1165           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1167           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1168           ierr = PetscFree(indx1);CHKERRQ(ierr);
1169           break;
1170 
1171     case 2:
1172          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1173           for (i=0;i<dim[0];i++){
1174              for (j=0;j<dim[1];j++){
1175                 indx1[i*dim[1]+j] = i*dim[1] + j;
1176              }
1177           }
1178          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1179          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1180          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1181          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1183          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1184          ierr = PetscFree(indx1);CHKERRQ(ierr);
1185          break;
1186     case 3:
1187          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1188          for (i=0;i<dim[0];i++){
1189             for (j=0;j<dim[1];j++){
1190                for (k=0;k<dim[2];k++){
1191                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1192                }
1193             }
1194          }
1195          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1196          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1197          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1198          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1199          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1200          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1201          ierr = PetscFree(indx1);CHKERRQ(ierr);
1202          break;
1203     default:
1204 */
1205     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
1206     //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1207     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1208     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1209     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1210     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1211     ierr = ISDestroy(&list1);CHKERRQ(ierr);
1212   //       break;
1213    // }
1214   }
1215   else{
1216 
1217  switch (ndim){
1218  case 1:
1219 #if defined(PETSC_USE_COMPLEX)
1220       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1221       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
1222       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
1223       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1224       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1225       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1226       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1227       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1229       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1230       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1231 #else
1232       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1233 #endif
1234  break;
1235  case 2:
1236 #if defined(PETSC_USE_COMPLEX)
1237       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1238       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1239       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1240       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1241       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1242       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1243       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1244       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1245       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1246 #else
1247       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1248       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
1249 
1250       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
1251       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
1252 
1253       if (dim[1]%2==0)
1254         NM = dim[1]+2;
1255       else
1256         NM = dim[1]+1;
1257 
1258       for (i=0;i<local_n0;i++){
1259          for (j=0;j<dim[1];j++){
1260             tempindx = i*dim[1] + j;
1261             tempindx1 = i*NM + j;
1262             indx1[tempindx]=local_0_start*dim[1]+tempindx;
1263             indx2[tempindx]=low+tempindx1;
1264          }
1265        }
1266 
1267        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1268        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1269 
1270        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1271        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1272        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1273        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1274        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1275        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1276        ierr = PetscFree(indx1);CHKERRQ(ierr);
1277        ierr = PetscFree(indx2);CHKERRQ(ierr);
1278 #endif
1279  break;
1280  case 3:
1281 #if defined(PETSC_USE_COMPLEX)
1282        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1283        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1284        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1285        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1286        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1287        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1288        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1289        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1290        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1291 #else
1292 
1293        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1294        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1295 
1296 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1297 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1298        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1299        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1300 
1301        if (dim[2]%2==0)
1302         NM = dim[2]+2;
1303        else
1304         NM = dim[2]+1;
1305 
1306        for (i=0;i<local_n0;i++){
1307           for (j=0;j<dim[1];j++){
1308              for (k=0;k<dim[2];k++){
1309                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1310                 tempindx1 = i*dim[1]*NM + j*NM + k;
1311                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1312                 indx2[tempindx]=low+tempindx1;
1313              }
1314         //    printf("Val tempindx1 = %d\n",tempindx1);
1315         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1316         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1317         //    printf("-------------------------\n",indx2[tempindx],rank);
1318           }
1319        }
1320 
1321        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1322        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1323 
1324        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1325        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1326        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1327        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1328        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1329        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1330        ierr = PetscFree(indx1);CHKERRQ(ierr);
1331        ierr = PetscFree(indx2);CHKERRQ(ierr);
1332 #endif
1333  break;
1334  default:
1335 #if defined(PETSC_USE_COMPLEX)
1336        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1337        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1338        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1339       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1340       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1341        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1342        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1343        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1344        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1345        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1346        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1347 #else
1348        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1349        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1350        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1351        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1352        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1353 
1354        partial_dim = fftw->partial_dim;
1355 
1356        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1357        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1358 
1359        if (dim[ndim-1]%2==0)
1360          NM = 2;
1361        else
1362          NM = 1;
1363 
1364        j = low;
1365        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1366           {
1367            indx1[i] = local_0_start*partial_dim + i;
1368            indx2[i] = j;
1369            if (k%dim[ndim-1]==0)
1370              { j+=NM;}
1371            j++;
1372           }
1373        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1374        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1375 
1376        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1377        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1378        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1379        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1380        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1381        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1382        ierr = PetscFree(indx1);CHKERRQ(ierr);
1383        ierr = PetscFree(indx2);CHKERRQ(ierr);
1384 #endif
1385  break;
1386  }
1387  }
1388  return 0;
1389 }
1390 EXTERN_C_END
1391 
1392 EXTERN_C_BEGIN
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatCreate_FFTW"
1395 /*
1396       MatCreate_FFTW - Creates a matrix object that provides FFT
1397   via the external package FFTW
1398   Options Database Keys:
1399 + -mat_fftw_plannerflags - set FFTW planner flags
1400 
1401    Level: intermediate
1402 
1403 */
1404 
1405 PetscErrorCode MatCreate_FFTW(Mat A)
1406 {
1407   PetscErrorCode ierr;
1408   MPI_Comm       comm=((PetscObject)A)->comm;
1409   Mat_FFT        *fft=(Mat_FFT*)A->data;
1410   Mat_FFTW       *fftw;
1411   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1412   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1413   PetscBool      flg;
1414   PetscInt       p_flag,partial_dim=1,ctr;
1415   PetscMPIInt    size,rank;
1416   ptrdiff_t      *pdim;
1417   ptrdiff_t      local_n1,local_1_start;
1418 #if !defined(PETSC_USE_COMPLEX)
1419    ptrdiff_t     temp;
1420    PetscInt      N1;
1421 #else
1422    PetscInt n1;
1423 #endif
1424 
1425 
1426   PetscFunctionBegin;
1427   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1428   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1429   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1430 
1431   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1432   pdim[0] = dim[0];
1433   for (ctr=1;ctr<ndim;ctr++)
1434      {
1435           partial_dim *= dim[ctr];
1436           pdim[ctr] = dim[ctr];
1437      }
1438 
1439   if (size == 1) {
1440 #if defined(PETSC_USE_COMPLEX)
1441     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1442     n = N;
1443 #else
1444     PetscInt tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1445     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1446     n = tot_dim;
1447 #endif
1448 
1449   } else {
1450     ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end;
1451     switch (ndim){
1452     case 1:
1453 #if !defined(PETSC_USE_COMPLEX)
1454    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1455 #else
1456       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1457       n = (PetscInt)local_n0;
1458       n1 = (PetscInt) local_n1;
1459       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1460 #endif
1461       break;
1462     case 2:
1463 #if defined(PETSC_USE_COMPLEX)
1464       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1465       /*
1466        PetscMPIInt    rank;
1467        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1468        PetscSynchronizedFlush(comm);
1469        */
1470       n = (PetscInt)local_n0*dim[1];
1471       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1472 #else
1473       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1474       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1475 //      n1 = 2*(PetscInt)local_n1*(dim[0]);
1476 //      ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1477       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1478 #endif
1479       break;
1480     case 3:
1481 #if defined(PETSC_USE_COMPLEX)
1482       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1483       n = (PetscInt)local_n0*dim[1]*dim[2];
1484       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1485 #else
1486       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1487       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1488 //      n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
1489 //      ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1490       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1491 #endif
1492       break;
1493     default:
1494 #if defined(PETSC_USE_COMPLEX)
1495       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1496       n = (PetscInt)local_n0*partial_dim;
1497       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1498 #else
1499       temp = pdim[ndim-1];
1500       pdim[ndim-1] = temp/2 + 1;
1501       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1502       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1503       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1504       pdim[ndim-1] = temp;
1505 //      temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]);
1506 //      n1 = 2*local_n1*temp;
1507 //      ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr);
1508       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1509 #endif
1510       break;
1511     }
1512   }
1513   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1514   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1515   fft->data = (void*)fftw;
1516 
1517   fft->n           = n;
1518   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1519   fftw->partial_dim = partial_dim;
1520   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1521   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1522 
1523   fftw->p_forward  = 0;
1524   fftw->p_backward = 0;
1525   fftw->p_flag     = FFTW_ESTIMATE;
1526 
1527   if (size == 1){
1528     A->ops->mult          = MatMult_SeqFFTW;
1529     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1530   } else {
1531     A->ops->mult          = MatMult_MPIFFTW;
1532     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1533   }
1534   fft->matdestroy          = MatDestroy_FFTW;
1535   // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1536   //A->ops->getvecs       = MatGetVecs_FFTW;
1537   A->assembled          = PETSC_TRUE;
1538   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
1539   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1540   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1541 
1542   /* get runtime options */
1543   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1544     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1545     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1546   PetscOptionsEnd();
1547   PetscFunctionReturn(0);
1548 }
1549 EXTERN_C_END
1550 
1551 
1552 
1553 
1554