xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision c8a8a4f0d4d51c20aa00b6c6fd97dc1f439daf51)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt   partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed!
28 
29 #undef __FUNCT__
30 #define __FUNCT__ "MatMult_SeqFFTW"
31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
32 {
33   PetscErrorCode ierr;
34   Mat_FFT        *fft  = (Mat_FFT*)A->data;
35   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
36   PetscScalar    *x_array,*y_array;
37   PetscInt       ndim=fft->ndim,*dim=fft->dim;
38 
39   PetscFunctionBegin;
40   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
41   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
42   if (!fftw->p_forward){ /* create a plan, then excute it */
43     switch (ndim){
44     case 1:
45 #if defined(PETSC_USE_COMPLEX)
46       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
47 #else
48       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
49 #endif
50       break;
51     case 2:
52 #if defined(PETSC_USE_COMPLEX)
53       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
54 #else
55       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
56 #endif
57       break;
58     case 3:
59 #if defined(PETSC_USE_COMPLEX)
60       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
61 #else
62       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
63 #endif
64       break;
65     default:
66 #if defined(PETSC_USE_COMPLEX)
67       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
68 #else
69       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
70 #endif
71       break;
72     }
73     fftw->finarray  = x_array;
74     fftw->foutarray = y_array;
75     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
76                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
77     fftw_execute(fftw->p_forward);
78   } else { /* use existing plan */
79     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
80       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
81     } else {
82       fftw_execute(fftw->p_forward);
83     }
84   }
85   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
86   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
87   PetscFunctionReturn(0);
88 }
89 
90 #undef __FUNCT__
91 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
93 {
94   PetscErrorCode ierr;
95   Mat_FFT        *fft = (Mat_FFT*)A->data;
96   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
97   PetscScalar    *x_array,*y_array;
98   PetscInt       ndim=fft->ndim,*dim=fft->dim;
99 
100   PetscFunctionBegin;
101   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
102   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
103   if (!fftw->p_backward){ /* create a plan, then excute it */
104     switch (ndim){
105     case 1:
106 #if defined(PETSC_USE_COMPLEX)
107       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
108 #else
109       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
110 #endif
111       break;
112     case 2:
113 #if defined(PETSC_USE_COMPLEX)
114       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
115 #else
116       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
117 #endif
118       break;
119     case 3:
120 #if defined(PETSC_USE_COMPLEX)
121       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
122 #else
123       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
124 #endif
125       break;
126     default:
127 #if defined(PETSC_USE_COMPLEX)
128       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
129 #else
130       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
131 #endif
132       break;
133     }
134     fftw->binarray  = x_array;
135     fftw->boutarray = y_array;
136     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
137   } else { /* use existing plan */
138     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
139       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
140     } else {
141       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
142     }
143   }
144   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
145   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "MatMult_MPIFFTW"
151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
152 {
153   PetscErrorCode ierr;
154   Mat_FFT        *fft  = (Mat_FFT*)A->data;
155   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
156   PetscScalar    *x_array,*y_array;
157   PetscInt       ndim=fft->ndim,*dim=fft->dim;
158   MPI_Comm       comm=((PetscObject)A)->comm;
159 
160   PetscFunctionBegin;
161   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
162   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
163   if (!fftw->p_forward){ /* create a plan, then excute it */
164     switch (ndim){
165     case 1:
166 #if defined(PETSC_USE_COMPLEX)
167       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
168 #else
169       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
170 #endif
171       break;
172     case 2:
173 #if defined(PETSC_USE_COMPLEX)
174       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
175 #else
176       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
177 #endif
178       break;
179     case 3:
180 #if defined(PETSC_USE_COMPLEX)
181       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
182 #else
183       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
184 #endif
185       break;
186     default:
187 #if defined(PETSC_USE_COMPLEX)
188       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
189 #else
190       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
191 #endif
192       break;
193     }
194     fftw->finarray  = x_array;
195     fftw->foutarray = y_array;
196     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
197                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
198     fftw_execute(fftw->p_forward);
199   } else { /* use existing plan */
200     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
201       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
202     } else {
203       fftw_execute(fftw->p_forward);
204     }
205   }
206   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
207   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
214 {
215   PetscErrorCode ierr;
216   Mat_FFT        *fft  = (Mat_FFT*)A->data;
217   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
218   PetscScalar    *x_array,*y_array;
219   PetscInt       ndim=fft->ndim,*dim=fft->dim;
220   MPI_Comm       comm=((PetscObject)A)->comm;
221 
222   PetscFunctionBegin;
223   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
224   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
225   if (!fftw->p_backward){ /* create a plan, then excute it */
226     switch (ndim){
227     case 1:
228 #if defined(PETSC_USE_COMPLEX)
229       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
230 #else
231       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
232 #endif
233       break;
234     case 2:
235 #if defined(PETSC_USE_COMPLEX)
236       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
237 #else
238       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
239 #endif
240       break;
241     case 3:
242 #if defined(PETSC_USE_COMPLEX)
243       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
244 #else
245       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
246 #endif
247       break;
248     default:
249 #if defined(PETSC_USE_COMPLEX)
250       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
251 #else
252       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
253 #endif
254       break;
255     }
256     fftw->binarray  = x_array;
257     fftw->boutarray = y_array;
258     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
259   } else { /* use existing plan */
260     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
261       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
262     } else {
263       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
264     }
265   }
266   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
267   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatDestroy_FFTW"
273 PetscErrorCode MatDestroy_FFTW(Mat A)
274 {
275   Mat_FFT        *fft = (Mat_FFT*)A->data;
276   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   fftw_destroy_plan(fftw->p_forward);
281   fftw_destroy_plan(fftw->p_backward);
282   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
283   ierr = PetscFree(fft->data);CHKERRQ(ierr);
284   PetscFunctionReturn(0);
285 }
286 
287 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
288 #undef __FUNCT__
289 #define __FUNCT__ "VecDestroy_MPIFFTW"
290 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
291 {
292   PetscErrorCode  ierr;
293   PetscScalar     *array;
294 
295   PetscFunctionBegin;
296   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
297   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
298   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
299   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatGetVecsFFTW_1DC"
305 /*
306     - Get Vectors(s) compatible with matrix, i.e. with the
307      parallel layout determined by FFTW-1D
308 
309 */
310 PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout)
311 {
312   PetscErrorCode ierr;
313   PetscMPIInt    size,rank;
314   MPI_Comm       comm=((PetscObject)A)->comm;
315   Mat_FFT        *fft = (Mat_FFT*)A->data;
316 //  Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
317   PetscInt       N=fft->N;
318   PetscInt       ndim=fft->ndim,*dim=fft->dim;
319   ptrdiff_t      f_alloc_local,f_local_n0,f_local_0_start;
320   ptrdiff_t      f_local_n1,f_local_1_end;
321   ptrdiff_t      b_alloc_local,b_local_n0,b_local_0_start;
322   ptrdiff_t      b_local_n1,b_local_1_end;
323   fftw_complex   *data_fin,*data_fout,*data_bout;
324 
325   PetscFunctionBegin;
326 #if !defined(PETSC_USE_COMPLEX)
327   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
328 #endif
329   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
330   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
331   if (size == 1){
332     SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");
333   }
334   else {
335       if (ndim>1){
336         SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");}
337       else {
338           f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end);
339           b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end);
340           if (fin) {
341             data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
342             ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
343             (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
344           }
345           if (fout) {
346             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local);
347             ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
348             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
349           }
350           if (bout) {
351             data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local);
352             ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
353             (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
354           }
355   }
356   if (fin){
357     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
358   }
359   if (fout){
360     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
361   }
362   if (bout){
363     ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr);
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 
369 }
370 
371 #undef __FUNCT__
372 #define __FUNCT__ "MatGetVecsFFTW"
373 /*@
374    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
375      parallel layout determined by FFTW
376 
377    Collective on Mat
378 
379    Input Parameter:
380 .  mat - the matrix
381 
382    Output Parameter:
383 +   fin - (optional) input vector of forward FFTW
384 -   fout - (optional) output vector of forward FFTW
385 
386   Level: advanced
387 
388 .seealso: MatCreateFFTW()
389 @*/
390 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
391 {
392   PetscErrorCode ierr;
393   PetscFunctionBegin;
394   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 EXTERN_C_BEGIN
399 #undef __FUNCT__
400 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
401 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
402 {
403   PetscErrorCode ierr;
404   PetscMPIInt    size,rank;
405   MPI_Comm       comm=((PetscObject)A)->comm;
406   Mat_FFT        *fft = (Mat_FFT*)A->data;
407   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
408   PetscInt       N=fft->N;
409   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
410 
411   PetscFunctionBegin;
412   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
413   PetscValidType(A,1);
414 
415   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
416   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
417   if (size == 1){ /* sequential case */
418   printf("Routine is getting called\n");
419 #if defined(PETSC_USE_COMPLEX)
420     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
421     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
422     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
423 #else
424     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
425     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
426     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);}
427 #endif
428   } else {
429     ptrdiff_t      alloc_local,local_n0,local_0_start;
430     ptrdiff_t      local_n1;
431     fftw_complex   *data_fout;
432     ptrdiff_t      local_1_start;
433 #if defined(PETSC_USE_COMPLEX)
434     fftw_complex   *data_fin,*data_bout;
435 #else
436     double         *data_finr,*data_boutr;
437     PetscInt       n1,N1,vsize;
438     ptrdiff_t      temp;
439 #endif
440 
441     switch (ndim){
442           case 1:
443 #if !defined(PETSC_USE_COMPLEX)
444                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
445 #else
446                  //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet");
447                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
448                  if (fin) {
449                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
450                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
451                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
452                          }
453                  if (fout) {
454                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
455                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
456                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
457                          }
458                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
459                  if (bout) {
460                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
461                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
462                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
463                          }
464           break;
465 #endif
466           case 2:
467 #if !defined(PETSC_USE_COMPLEX)
468                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
469                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
470                  if (fin) {
471                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
472                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
473                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
474                           }
475                  if (bout) {
476                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
477                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
478                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
479                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
480                           }
481                  //n1 = 2*local_n1*dim[0];
482                  if (fout) {
483                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
484                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
485                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
486                            }
487 #else
488       /* Get local size */
489                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
490                  if (fin) {
491                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
492                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
493                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
494                           }
495                  if (fout) {
496                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
497                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
498                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
499                            }
500                  if (bout) {
501                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
502                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
503                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
504                           }
505 #endif
506           break;
507           case 3:
508 #if !defined(PETSC_USE_COMPLEX)
509                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
510                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
511                  if (fin) {
512                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
513                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
514                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
515                          }
516                  if (bout) {
517                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
518                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
519                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
520                           }
521                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
522                  if (fout) {
523                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
524                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
525                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
526                           }
527 #else
528                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
529                  if (fin) {
530                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
531                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
532                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
533                          }
534                  if (fout) {
535                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
536                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
537                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
538                          }
539                  if (bout) {
540                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
541                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
542                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
543                          }
544 #endif
545           break;
546           default:
547 #if !defined(PETSC_USE_COMPLEX)
548                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
549                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
550                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
551                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
552                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
553 
554                  if (fin) {
555                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
556                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
557                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
558                         }
559                  if (bout) {
560                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
561                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
562                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
563                         }
564                  //temp = fftw->partial_dim;
565                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
566                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
567                  if (fout) {
568                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
569                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
570                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
571                         }
572 #else
573                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
574                 if (fin) {
575                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
576                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
577                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
578                        }
579                 if (fout) {
580                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
581                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
582                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
583                        }
584                 if (bout) {
585                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
586                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
587                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
588                        }
589 #endif
590             break;
591           }
592   }
593   PetscFunctionReturn(0);
594 }
595 EXTERN_C_END
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatGetVecs_FFTW"
599 /*
600    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
601      parallel layout determined by FFTW
602 
603    Collective on Mat
604 
605    Input Parameter:
606 .  mat - the matrix
607 
608    Output Parameter:
609 +   fin - (optional) input vector of forward FFTW
610 -   fout - (optional) output vector of forward FFTW
611 
612   Level: advanced
613 
614 .seealso: MatCreateFFTW()
615 */
616 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
617 {
618   PetscErrorCode ierr;
619   PetscMPIInt    size,rank;
620   MPI_Comm       comm=((PetscObject)A)->comm;
621   Mat_FFT        *fft = (Mat_FFT*)A->data;
622   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
623   PetscInt       N=fft->N;
624   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
625 
626   PetscFunctionBegin;
627   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
628   PetscValidType(A,1);
629 
630   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
631   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
632   if (size == 1){ /* sequential case */
633 #if defined(PETSC_USE_COMPLEX)
634     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
635     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
636 #else
637     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);}
638     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);}
639 #endif
640   } else {        /* mpi case */
641     ptrdiff_t      alloc_local,local_n0,local_0_start;
642     ptrdiff_t      local_n1,local_1_end;
643     fftw_complex   *data_fin,*data_fout;
644 #if !defined(PETSC_USE_COMPLEX)
645     double         *data_finr ;
646     ptrdiff_t      local_1_start,temp;
647     PetscInt       vsize,n1,N1;
648 #endif
649 
650 //    PetscInt ctr;
651 //    ptrdiff_t      ndim1,*pdim;
652 //    ndim1=(ptrdiff_t) ndim;
653 //    pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
654 
655 //    for(ctr=0;ctr<ndim;ctr++)
656 //        {k
657 //           pdim[ctr] = dim[ctr];
658 //       }
659 
660 
661 
662     switch (ndim){
663     case 1:
664       /* Get local size */
665       /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */
666 //      SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine");
667       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
668       printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1);
669       if (fin) {
670         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
671         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
672         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
673       }
674       if (fout) {
675         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
676         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
677         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
678       }
679       break;
680     case 2:
681 #if !defined(PETSC_USE_COMPLEX)
682       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
683       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
684       if (fin) {
685         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
686         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
687         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
688         //printf("The code comes here with vector size %d\n",vsize);
689         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
690       }
691       n1 = 2*local_n1*(dim[0]);
692       //n1 = 2*local_n1*dim[1];
693       printf("The values are %ld\n",local_n1);
694       if (fout) {
695         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
696         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
697         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
698       }
699       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
700 
701 #else
702       /* Get local size */
703      //printf("Hope this does not come here");
704       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
705       if (fin) {
706         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
707         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
708         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
709       }
710       if (fout) {
711         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
712         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
713         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
714       }
715      //printf("Hope this does not come here");
716 #endif
717       break;
718     case 3:
719       /* Get local size */
720 #if !defined(PETSC_USE_COMPLEX)
721       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
722       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
723       if (fin) {
724         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
725         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
726         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
727         //printf("The code comes here with vector size %d\n",vsize);
728         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
729       }
730       printf("The value is %ld",local_n1);
731       n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
732       if (fout) {
733         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
734         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
735         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
736       }
737       printf("Vector size from fftw.c is  given by %d, %d\n",n1,N1);
738 
739 
740 #else
741       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
742 //      printf("The quantity n is %d",n);
743       if (fin) {
744         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
745         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
746         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
747       }
748       if (fout) {
749         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
750         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
751         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
752       }
753 #endif
754       break;
755     default:
756       /* Get local size */
757 #if !defined(PETSC_USE_COMPLEX)
758       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
759       printf("The value of temp is %ld\n",temp);
760       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
761       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
762       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
763       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
764       if (fin) {
765         data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
766         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
767         ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr);
768         //printf("The code comes here with vector size %d\n",vsize);
769         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
770       }
771       printf("The value is %ld. Global length is %d \n",local_n1,N1);
772       temp = fftw->partial_dim;
773       fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
774 
775       n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
776       if (fout) {
777         data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
778         ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
779         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
780       }
781 
782 #else
783       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
784 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
785 //      printf("The value of alloc local is %d",alloc_local);
786 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
787 //      for(i=0;i<ndim;i++)
788 //         {
789 //          pdim[i]=dim[i];printf("%d",pdim[i]);
790 //         }
791 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
792 //      printf("The quantity n is %d",n);
793       if (fin) {
794         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
795         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
796         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
797       }
798       if (fout) {
799         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
800         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
801         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
802       }
803 #endif
804       break;
805     }
806   }
807 //  if (fin){
808 //    ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
809 //  }
810 //  if (fout){
811 //    ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
812 //  }
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "InputTransformFFT"
818 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y)
819 {
820   PetscErrorCode ierr;
821   PetscFunctionBegin;
822   ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
823   PetscFunctionReturn(0);
824 }
825 
826 /*
827       InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real
828       parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst
829       changing dimension.
830   Input A, x, y
831   A - FFTW matrix
832   x - user data
833   Options Database Keys:
834 + -mat_fftw_plannerflags - set FFTW planner flags
835 
836    Level: intermediate
837 
838 */
839 
840 EXTERN_C_BEGIN
841 #undef __FUNCT__
842 #define __FUNCT__ "InputTransformFFT_FTTW"
843 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y)
844 {
845   PetscErrorCode ierr;
846   MPI_Comm       comm=((PetscObject)A)->comm;
847   Mat_FFT        *fft = (Mat_FFT*)A->data;
848   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
849   PetscInt       N=fft->N;
850   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
851   PetscInt       low;
852   PetscInt       rank,size;
853   ptrdiff_t      alloc_local,local_n0,local_0_start;
854   ptrdiff_t      local_n1,local_1_start;
855   VecScatter     vecscat;
856   IS             list1,list2;
857 #if !defined(PETSC_USE_COMPLEX)
858   PetscInt       i,j,k,partial_dim;
859   PetscInt       *indx1, *indx2, tempindx, tempindx1;
860   PetscInt       N1, n1 ,NM;
861   ptrdiff_t      temp;
862 #endif
863 
864   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
865   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
866   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
867 
868   if (size==1)
869     {
870 /*     switch (ndim){
871      case 1:
872           ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
873           for (i=0;i<dim[0];i++)
874              {
875               indx1[i] = i;
876              }
877           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
878           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
879           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
882           ierr = ISDestroy(&list1);CHKERRQ(ierr);
883           ierr = PetscFree(indx1);CHKERRQ(ierr);
884      break;
885 
886      case 2:
887           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
888           for (i=0;i<dim[0];i++){
889              for (j=0;j<dim[1];j++){
890                 indx1[i*dim[1]+j] = i*dim[1] + j;
891              }
892           }
893           ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
894           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
895           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
897           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
898           ierr = ISDestroy(&list1);CHKERRQ(ierr);
899           ierr = PetscFree(indx1);CHKERRQ(ierr);
900           break;
901      case 3:
902           ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
903           for (i=0;i<dim[0];i++){
904              for (j=0;j<dim[1];j++){
905                 for (k=0;k<dim[2];k++){
906                    indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
907                 }
908              }
909           }
910           ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
911           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
912           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
913           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
915           ierr = ISDestroy(&list1);CHKERRQ(ierr);
916           ierr = PetscFree(indx1);CHKERRQ(ierr);
917           break;
918      default:
919 */
920           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
921           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
922           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
925           ierr = ISDestroy(&list1);CHKERRQ(ierr);
926  //         break;
927   //    }
928     }
929 
930  else{
931 
932  switch (ndim){
933  case 1:
934 #if defined(PETSC_USE_COMPLEX)
935       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
936       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
937       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
938       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
939       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
942       ierr = ISDestroy(&list1);CHKERRQ(ierr);
943       ierr = ISDestroy(&list2);CHKERRQ(ierr);
944 #else
945       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
946 #endif
947  break;
948  case 2:
949 #if defined(PETSC_USE_COMPLEX)
950       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
951       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
952       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
953       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
954       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
956       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
957       ierr = ISDestroy(&list1);CHKERRQ(ierr);
958       ierr = ISDestroy(&list2);CHKERRQ(ierr);
959 #else
960       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
961       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
962       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
963       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
964       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
965       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
966 
967       if (dim[1]%2==0)
968         NM = dim[1]+2;
969       else
970         NM = dim[1]+1;
971 
972       for (i=0;i<local_n0;i++){
973          for (j=0;j<dim[1];j++){
974             tempindx = i*dim[1] + j;
975             tempindx1 = i*NM + j;
976             indx1[tempindx]=local_0_start*dim[1]+tempindx;
977             indx2[tempindx]=low+tempindx1;
978         }
979      }
980 
981       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
982       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
983 
984       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
985       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
988       ierr = ISDestroy(&list1);CHKERRQ(ierr);
989       ierr = ISDestroy(&list2);CHKERRQ(ierr);
990       ierr = PetscFree(indx1);CHKERRQ(ierr);
991       ierr = PetscFree(indx2);CHKERRQ(ierr);
992 #endif
993  break;
994 
995  case 3:
996 #if defined(PETSC_USE_COMPLEX)
997       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
998       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
999       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1000       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1001       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1002       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1003       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1006       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1007       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1008 #else
1009       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1010       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1011 
1012 //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1013 //      ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1014       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1015       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1016 
1017       if (dim[2]%2==0)
1018         NM = dim[2]+2;
1019       else
1020         NM = dim[2]+1;
1021 
1022       for (i=0;i<local_n0;i++){
1023          for (j=0;j<dim[1];j++){
1024             for (k=0;k<dim[2];k++){
1025                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1026                tempindx1 = i*dim[1]*NM + j*NM + k;
1027                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1028                indx2[tempindx]=low+tempindx1;
1029             }
1030          }
1031       }
1032 
1033       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1034       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1035       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1036       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1038       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1039       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1040       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1041       ierr = PetscFree(indx1);CHKERRQ(ierr);
1042       ierr = PetscFree(indx2);CHKERRQ(ierr);
1043 #endif
1044  break;
1045 
1046  default:
1047 #if defined(PETSC_USE_COMPLEX)
1048       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1049       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1050       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1051       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1052       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1054       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1055       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1056       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1057 #else
1058       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1059       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1060       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1061       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1062       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1063 
1064       partial_dim = fftw->partial_dim;
1065 
1066       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1067       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1068 
1069       if (dim[ndim-1]%2==0)
1070         NM = 2;
1071       else
1072         NM = 1;
1073 
1074       j = low;
1075       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1076          {
1077           indx1[i] = local_0_start*partial_dim + i;
1078           indx2[i] = j;
1079           if (k%dim[ndim-1]==0)
1080             { j+=NM;}
1081           j++;
1082          }
1083       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1084       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1085       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1086       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1089       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1090       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1091       ierr = PetscFree(indx1);CHKERRQ(ierr);
1092       ierr = PetscFree(indx2);CHKERRQ(ierr);
1093 #endif
1094  break;
1095   }
1096  }
1097 
1098  return 0;
1099 }
1100 EXTERN_C_END
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "OutputTransformFFT"
1104 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y)
1105 {
1106   PetscErrorCode ierr;
1107   PetscFunctionBegin;
1108   ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 /*
1113       OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use
1114   Input A, x, y
1115   A - FFTW matrix
1116   x - FFTW vector
1117   y - PETSc vector that user can read
1118   Options Database Keys:
1119 + -mat_fftw_plannerflags - set FFTW planner flags
1120 
1121    Level: intermediate
1122 
1123 */
1124 
1125 EXTERN_C_BEGIN
1126 #undef __FUNCT__
1127 #define __FUNCT__ "OutputTransformFFT_FTTW"
1128 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y)
1129 {
1130   PetscErrorCode ierr;
1131   MPI_Comm       comm=((PetscObject)A)->comm;
1132   Mat_FFT        *fft = (Mat_FFT*)A->data;
1133   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
1134   PetscInt       N=fft->N;
1135   PetscInt       ndim=fft->ndim,*dim=fft->dim;
1136   PetscInt       low;
1137   PetscInt       rank,size;
1138   ptrdiff_t      alloc_local,local_n0,local_0_start;
1139   ptrdiff_t      local_n1,local_1_start;
1140   VecScatter     vecscat;
1141   IS             list1,list2;
1142 #if !defined(PETSC_USE_COMPLEX)
1143   PetscInt       i,j,k,partial_dim;
1144   PetscInt       *indx1, *indx2, tempindx, tempindx1;
1145   PetscInt       N1, n1 ,NM;
1146   ptrdiff_t      temp;
1147 #endif
1148 
1149 
1150   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1151   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1152   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
1153 
1154   if (size==1){
1155 /*
1156     switch (ndim){
1157     case 1:
1158            ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr);
1159           for (i=0;i<dim[0];i++)
1160              {
1161               indx1[i] = i;
1162              }
1163           ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1164           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1165           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1167           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1168           ierr = ISDestroy(&list1);CHKERRQ(ierr);
1169           ierr = PetscFree(indx1);CHKERRQ(ierr);
1170           break;
1171 
1172     case 2:
1173          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr);
1174           for (i=0;i<dim[0];i++){
1175              for (j=0;j<dim[1];j++){
1176                 indx1[i*dim[1]+j] = i*dim[1] + j;
1177              }
1178           }
1179          ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1180          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1181          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1182          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1183          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1184          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1185          ierr = PetscFree(indx1);CHKERRQ(ierr);
1186          break;
1187     case 3:
1188          ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1189          for (i=0;i<dim[0];i++){
1190             for (j=0;j<dim[1];j++){
1191                for (k=0;k<dim[2];k++){
1192                   indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k;
1193                }
1194             }
1195          }
1196          ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1197          ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1198          ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1199          ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1200          ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1201          ierr = ISDestroy(&list1);CHKERRQ(ierr);
1202          ierr = PetscFree(indx1);CHKERRQ(ierr);
1203          break;
1204     default:
1205 */
1206     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
1207     //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF);
1208     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
1209     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1210     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1211     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1212     ierr = ISDestroy(&list1);CHKERRQ(ierr);
1213   //       break;
1214    // }
1215   }
1216   else{
1217 
1218  switch (ndim){
1219  case 1:
1220 #if defined(PETSC_USE_COMPLEX)
1221       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1222       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
1223       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
1224       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1225       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1226       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1227       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1230       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1231       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1232 #else
1233       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
1234 #endif
1235  break;
1236  case 2:
1237 #if defined(PETSC_USE_COMPLEX)
1238       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1239       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
1240       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
1241       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1242       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1243       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1245       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1246       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1247 #else
1248       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1249       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
1250 
1251       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
1252       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
1253 
1254       if (dim[1]%2==0)
1255         NM = dim[1]+2;
1256       else
1257         NM = dim[1]+1;
1258 
1259       for (i=0;i<local_n0;i++){
1260          for (j=0;j<dim[1];j++){
1261             tempindx = i*dim[1] + j;
1262             tempindx1 = i*NM + j;
1263             indx1[tempindx]=local_0_start*dim[1]+tempindx;
1264             indx2[tempindx]=low+tempindx1;
1265          }
1266        }
1267 
1268        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1269        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1270 
1271        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1272        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1273        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1274        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1275        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1276        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1277        ierr = PetscFree(indx1);CHKERRQ(ierr);
1278        ierr = PetscFree(indx2);CHKERRQ(ierr);
1279 #endif
1280  break;
1281  case 3:
1282 #if defined(PETSC_USE_COMPLEX)
1283        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1284        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
1285        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
1286        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1287        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1288        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1289        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1290        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1291        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1292 #else
1293 
1294        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1295        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1296 
1297 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
1298 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
1299        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1300        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1301 
1302        if (dim[2]%2==0)
1303         NM = dim[2]+2;
1304        else
1305         NM = dim[2]+1;
1306 
1307        for (i=0;i<local_n0;i++){
1308           for (j=0;j<dim[1];j++){
1309              for (k=0;k<dim[2];k++){
1310                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
1311                 tempindx1 = i*dim[1]*NM + j*NM + k;
1312                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1313                 indx2[tempindx]=low+tempindx1;
1314              }
1315         //    printf("Val tempindx1 = %d\n",tempindx1);
1316         //    printf("index1 %d from proc %d is \n",indx1[tempindx],rank);
1317         //    printf("index2 %d from proc %d is \n",indx2[tempindx],rank);
1318         //    printf("-------------------------\n",indx2[tempindx],rank);
1319           }
1320        }
1321 
1322        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1323        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1324 
1325        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1326        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1327        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1328        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1329        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1330        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1331        ierr = PetscFree(indx1);CHKERRQ(ierr);
1332        ierr = PetscFree(indx2);CHKERRQ(ierr);
1333 #endif
1334  break;
1335  default:
1336 #if defined(PETSC_USE_COMPLEX)
1337        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1338        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1339        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
1340       //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD);
1341       //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD);
1342        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1343        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1344        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1345        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1346        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1347        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1348 #else
1349        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1350        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1351        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1352        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1353        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1354 
1355        partial_dim = fftw->partial_dim;
1356 
1357        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1358        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1359 
1360        if (dim[ndim-1]%2==0)
1361          NM = 2;
1362        else
1363          NM = 1;
1364 
1365        j = low;
1366        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1367           {
1368            indx1[i] = local_0_start*partial_dim + i;
1369            indx2[i] = j;
1370            if (k%dim[ndim-1]==0)
1371              { j+=NM;}
1372            j++;
1373           }
1374        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1375        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1376 
1377        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1378        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1379        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1381        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1382        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1383        ierr = PetscFree(indx1);CHKERRQ(ierr);
1384        ierr = PetscFree(indx2);CHKERRQ(ierr);
1385 #endif
1386  break;
1387  }
1388  }
1389  return 0;
1390 }
1391 EXTERN_C_END
1392 
1393 EXTERN_C_BEGIN
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatCreate_FFTW"
1396 /*
1397       MatCreate_FFTW - Creates a matrix object that provides FFT
1398   via the external package FFTW
1399   Options Database Keys:
1400 + -mat_fftw_plannerflags - set FFTW planner flags
1401 
1402    Level: intermediate
1403 
1404 */
1405 
1406 PetscErrorCode MatCreate_FFTW(Mat A)
1407 {
1408   PetscErrorCode ierr;
1409   MPI_Comm       comm=((PetscObject)A)->comm;
1410   Mat_FFT        *fft=(Mat_FFT*)A->data;
1411   Mat_FFTW       *fftw;
1412   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1413   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1414   PetscBool      flg;
1415   PetscInt       p_flag,partial_dim=1,ctr,n1;
1416   PetscMPIInt    size,rank;
1417   ptrdiff_t      *pdim;
1418   ptrdiff_t      local_n1,local_1_start;
1419 #if !defined(PETSC_USE_COMPLEX)
1420    ptrdiff_t     temp;
1421    PetscInt      N1;
1422 #endif
1423 
1424 
1425   PetscFunctionBegin;
1426   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1427   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1428   ierr = MPI_Barrier(PETSC_COMM_WORLD);
1429 
1430   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1431   pdim[0] = dim[0];
1432   for (ctr=1;ctr<ndim;ctr++)
1433      {
1434           partial_dim *= dim[ctr];
1435           pdim[ctr] = dim[ctr];
1436      }
1437 
1438   if (size == 1) {
1439 #if defined(PETSC_USE_COMPLEX)
1440     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1441     n = N;
1442 #else
1443     PetscInt tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1];
1444     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1445     n = tot_dim;
1446 #endif
1447 
1448   } else {
1449     ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end;
1450     switch (ndim){
1451     case 1:
1452 #if !defined(PETSC_USE_COMPLEX)
1453    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1454 #else
1455       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1456       n = (PetscInt)local_n0;
1457       n1 = (PetscInt) local_n1;
1458       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1459 #endif
1460       break;
1461     case 2:
1462 #if defined(PETSC_USE_COMPLEX)
1463       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1464       /*
1465        PetscMPIInt    rank;
1466        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1467        PetscSynchronizedFlush(comm);
1468        */
1469       n = (PetscInt)local_n0*dim[1];
1470       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1471 #else
1472       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1473       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1474 //      n1 = 2*(PetscInt)local_n1*(dim[0]);
1475 //      ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1476       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1477 #endif
1478       break;
1479     case 3:
1480 #if defined(PETSC_USE_COMPLEX)
1481       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1482       n = (PetscInt)local_n0*dim[1]*dim[2];
1483       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1484 #else
1485       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1486       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1487 //      n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
1488 //      ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1489       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1490 #endif
1491       break;
1492     default:
1493 #if defined(PETSC_USE_COMPLEX)
1494       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1495       n = (PetscInt)local_n0*partial_dim;
1496       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1497 #else
1498       temp = pdim[ndim-1];
1499       pdim[ndim-1] = temp/2 + 1;
1500       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1501       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1502       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1503       pdim[ndim-1] = temp;
1504 //      temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]);
1505 //      n1 = 2*local_n1*temp;
1506 //      ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr);
1507       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1508 #endif
1509       break;
1510     }
1511   }
1512   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1513   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1514   fft->data = (void*)fftw;
1515 
1516   fft->n           = n;
1517   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1518   fftw->partial_dim = partial_dim;
1519   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1520   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1521 
1522   fftw->p_forward  = 0;
1523   fftw->p_backward = 0;
1524   fftw->p_flag     = FFTW_ESTIMATE;
1525 
1526   if (size == 1){
1527     A->ops->mult          = MatMult_SeqFFTW;
1528     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1529   } else {
1530     A->ops->mult          = MatMult_MPIFFTW;
1531     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1532   }
1533   fft->matdestroy          = MatDestroy_FFTW;
1534   // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D
1535   //A->ops->getvecs       = MatGetVecs_FFTW;
1536   A->assembled          = PETSC_TRUE;
1537   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
1538   PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW);
1539   PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW);
1540 
1541   /* get runtime options */
1542   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1543     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1544     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1545   PetscOptionsEnd();
1546   PetscFunctionReturn(0);
1547 }
1548 EXTERN_C_END
1549 
1550 
1551 
1552 
1553