xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 4e5d74a76bd8fa54fede1e0a3eb6ccefe767a7da)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt    partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 
28 /* MatMult_SeqFFTW performs forward DFT in parallel
29 
30    Input parameter:
31    mat - the matrix
32    x   - the vector on which FDFT will be performed
33 
34    Output parameter:
35    y   - vector that stores result of FDFT
36 
37 */
38 #undef __FUNCT__
39 #define __FUNCT__ "MatMult_SeqFFTW"
40 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
41 {
42   PetscErrorCode ierr;
43   Mat_FFT        *fft  = (Mat_FFT*)A->data;
44   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
45   PetscScalar    *x_array,*y_array;
46   PetscInt       ndim=fft->ndim,*dim=fft->dim;
47 
48   PetscFunctionBegin;
49   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
50   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
51   if (!fftw->p_forward){ /* create a plan, then excute it */
52     switch (ndim){
53     case 1:
54 #if defined(PETSC_USE_COMPLEX)
55       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
56 #else
57       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
58 #endif
59       break;
60     case 2:
61 #if defined(PETSC_USE_COMPLEX)
62       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
63 #else
64       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
65 #endif
66       break;
67     case 3:
68 #if defined(PETSC_USE_COMPLEX)
69       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
70 #else
71       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
72 #endif
73       break;
74     default:
75 #if defined(PETSC_USE_COMPLEX)
76       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
77 #else
78       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag);
79 #endif
80       break;
81     }
82     fftw->finarray  = x_array;
83     fftw->foutarray = y_array;
84     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
85                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
86     fftw_execute(fftw->p_forward);
87   } else { /* use existing plan */
88     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
89       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
90     } else {
91       fftw_execute(fftw->p_forward);
92     }
93   }
94   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
95   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
96   PetscFunctionReturn(0);
97 }
98 
99 /* MatMultTranspose_SeqFFTW performs serial backward DFT
100 
101    Input parameter:
102    mat - the matrix
103    x   - the vector on which BDFT will be performed
104 
105    Output parameter:
106    y   - vector that stores result of BDFT
107 
108 */
109 
110 #undef __FUNCT__
111 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
112 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
113 {
114   PetscErrorCode ierr;
115   Mat_FFT        *fft = (Mat_FFT*)A->data;
116   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
117   PetscScalar    *x_array,*y_array;
118   PetscInt       ndim=fft->ndim,*dim=fft->dim;
119 
120   PetscFunctionBegin;
121   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
122   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
123   if (!fftw->p_backward){ /* create a plan, then excute it */
124     switch (ndim){
125     case 1:
126 #if defined(PETSC_USE_COMPLEX)
127       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
128 #else
129       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
130 #endif
131       break;
132     case 2:
133 #if defined(PETSC_USE_COMPLEX)
134       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
135 #else
136       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
137 #endif
138       break;
139     case 3:
140 #if defined(PETSC_USE_COMPLEX)
141       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
142 #else
143       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
144 #endif
145       break;
146     default:
147 #if defined(PETSC_USE_COMPLEX)
148       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
149 #else
150       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag);
151 #endif
152       break;
153     }
154     fftw->binarray  = x_array;
155     fftw->boutarray = y_array;
156     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
157   } else { /* use existing plan */
158     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
159       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
160     } else {
161       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
162     }
163   }
164   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
165   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
166   PetscFunctionReturn(0);
167 }
168 
169 /* MatMult_MPIFFTW performs forward DFT in parallel
170 
171    Input parameter:
172    mat - the matrix
173    x   - the vector on which FDFT will be performed
174 
175    Output parameter:
176    y   - vector that stores result of FDFT
177 
178 */
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatMult_MPIFFTW"
182 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
183 {
184   PetscErrorCode ierr;
185   Mat_FFT        *fft  = (Mat_FFT*)A->data;
186   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
187   PetscScalar    *x_array,*y_array;
188   PetscInt       ndim=fft->ndim,*dim=fft->dim;
189   MPI_Comm       comm=((PetscObject)A)->comm;
190 
191   PetscFunctionBegin;
192   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
193   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
194   if (!fftw->p_forward){ /* create a plan, then excute it */
195     switch (ndim){
196     case 1:
197 #if defined(PETSC_USE_COMPLEX)
198       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
199 #else
200       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
201 #endif
202       break;
203     case 2:
204 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
205       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
206 #else
207       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
208 #endif
209       break;
210     case 3:
211 #if defined(PETSC_USE_COMPLEX)
212       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
213 #else
214       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
215 #endif
216       break;
217     default:
218 #if defined(PETSC_USE_COMPLEX)
219       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
220 #else
221       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
222 #endif
223       break;
224     }
225     fftw->finarray  = x_array;
226     fftw->foutarray = y_array;
227     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
228                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
229     fftw_execute(fftw->p_forward);
230   } else { /* use existing plan */
231     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
232       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
233     } else {
234       fftw_execute(fftw->p_forward);
235     }
236   }
237   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
238   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
239   PetscFunctionReturn(0);
240 }
241 
242 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
243 
244    Input parameter:
245    mat - the matrix
246    x   - the vector on which BDFT will be performed
247 
248    Output parameter:
249    y   - vector that stores result of BDFT
250 
251 */
252 #undef __FUNCT__
253 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
254 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
255 {
256   PetscErrorCode ierr;
257   Mat_FFT        *fft  = (Mat_FFT*)A->data;
258   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
259   PetscScalar    *x_array,*y_array;
260   PetscInt       ndim=fft->ndim,*dim=fft->dim;
261   MPI_Comm       comm=((PetscObject)A)->comm;
262 
263   PetscFunctionBegin;
264   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
265   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
266   if (!fftw->p_backward){ /* create a plan, then excute it */
267     switch (ndim){
268     case 1:
269 #if defined(PETSC_USE_COMPLEX)
270       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
271 #else
272       SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet");
273 #endif
274       break;
275     case 2:
276 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
277       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
278 #else
279       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
280 #endif
281       break;
282     case 3:
283 #if defined(PETSC_USE_COMPLEX)
284       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
285 #else
286       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
287 #endif
288       break;
289     default:
290 #if defined(PETSC_USE_COMPLEX)
291       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
292 #else
293       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE);
294 #endif
295       break;
296     }
297     fftw->binarray  = x_array;
298     fftw->boutarray = y_array;
299     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
300   } else { /* use existing plan */
301     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
302       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
303     } else {
304       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
305     }
306   }
307   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
308   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatDestroy_FFTW"
314 PetscErrorCode MatDestroy_FFTW(Mat A)
315 {
316   Mat_FFT        *fft = (Mat_FFT*)A->data;
317   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
318   PetscErrorCode ierr;
319 
320   PetscFunctionBegin;
321   fftw_destroy_plan(fftw->p_forward);
322   fftw_destroy_plan(fftw->p_backward);
323   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
324   ierr = PetscFree(fft->data);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
329 #undef __FUNCT__
330 #define __FUNCT__ "VecDestroy_MPIFFTW"
331 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
332 {
333   PetscErrorCode  ierr;
334   PetscScalar     *array;
335 
336   PetscFunctionBegin;
337   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
338   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
339   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
340   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 
345 #undef __FUNCT__
346 #define __FUNCT__ "MatGetVecsFFTW"
347 /*@
348    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
349      parallel layout determined by FFTW
350 
351    Collective on Mat
352 
353    Input Parameter:
354 .  mat - the matrix
355 
356    Output Parameter:
357 +   fin - (optional) input vector of forward FFTW
358 -   fout - (optional) output vector of forward FFTW
359 -   bout - (optional) output vector of backward FFTW
360 
361   Level: advanced
362   Note: The parallel layout of output of forward FFTW is always same as the input
363         of the backward FFTW. But parallel layout of the input vector of forward
364         FFTW might not be same as the output of backward FFTW.
365         Also note that we need to provide enough space while doing parallel real transform.
366         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
367         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
368         last dimension from n to n/2+1 while invoking this routine.
369 
370 .seealso: MatCreateFFTW()
371 @*/
372 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
373 {
374   PetscErrorCode ierr;
375 
376   PetscFunctionBegin;
377   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
378   PetscFunctionReturn(0);
379 }
380 
381 EXTERN_C_BEGIN
382 #undef __FUNCT__
383 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
384 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
385 {
386   PetscErrorCode ierr;
387   PetscMPIInt    size,rank;
388   MPI_Comm       comm=((PetscObject)A)->comm;
389   Mat_FFT        *fft = (Mat_FFT*)A->data;
390   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
391   PetscInt       N=fft->N;
392   PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n;
393 
394   PetscFunctionBegin;
395   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
396   PetscValidType(A,1);
397 
398   ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr);
399   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
400   if (size == 1){ /* sequential case */
401 #if defined(PETSC_USE_COMPLEX)
402     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
403     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
404     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
405 #else
406     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
407     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
408     if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
409 #endif
410   } else { /* parallel cases */
411     ptrdiff_t      alloc_local,local_n0,local_0_start;
412     ptrdiff_t      local_n1;
413     fftw_complex   *data_fout;
414     ptrdiff_t      local_1_start;
415 #if defined(PETSC_USE_COMPLEX)
416     fftw_complex   *data_fin,*data_bout;
417 #else
418     double         *data_finr,*data_boutr;
419     PetscInt       n1,N1,vsize;
420     ptrdiff_t      temp;
421 #endif
422 
423     switch (ndim){
424           case 1:
425 #if !defined(PETSC_USE_COMPLEX)
426                  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
427 #else
428                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
429                  if (fin) {
430                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
431                          ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
432                          (*fin)->ops->destroy = VecDestroy_MPIFFTW;
433                          }
434                  if (fout) {
435                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
436                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
437                           (*fout)->ops->destroy = VecDestroy_MPIFFTW;
438                          }
439                  alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
440                  if (bout) {
441                           data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
442                           ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
443                           (*bout)->ops->destroy = VecDestroy_MPIFFTW;
444                          }
445           break;
446 #endif
447           case 2:
448 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
449                  alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
450                  N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
451                  if (fin) {
452                            data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
453                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
454                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
455                           }
456                  if (bout) {
457                            data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
458                            ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
459                            ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr);
460                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
461                           }
462                  //n1 = 2*local_n1*dim[0];
463                  if (fout) {
464                             data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
465                             ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
466                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
467                            }
468 #else
469       /* Get local size */
470                  alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
471                  if (fin) {
472                            data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
473                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
474                            (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
475                           }
476                  if (fout) {
477                             data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
478                             ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
479                             (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
480                            }
481                  if (bout) {
482                            data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
483                            ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
484                            (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
485                           }
486 #endif
487           break;
488           case 3:
489 #if !defined(PETSC_USE_COMPLEX)
490                  alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
491                  N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
492                  if (fin) {
493                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
494                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
495                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
496                          }
497                  if (bout) {
498                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
499                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
500                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
501                           }
502                  //n1 = 2*local_n1*dim[0]*(dim[2]/2+1);
503                  if (fout) {
504                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
505                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
506                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
507                           }
508 #else
509                  alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
510                  if (fin) {
511                          data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
512                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
513                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
514                          }
515                  if (fout) {
516                           data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
517                           ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
518                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
519                          }
520                  if (bout) {
521                          data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
522                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
523                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
524                          }
525 #endif
526           break;
527           default:
528 #if !defined(PETSC_USE_COMPLEX)
529                  temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
530                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
531                  alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
532                  N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
533                  (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
534 
535                  if (fin) {
536                          data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
537                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
538                          (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
539                         }
540                  if (bout) {
541                          data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2);
542                          ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
543                          (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
544                         }
545                  //temp = fftw->partial_dim;
546                  //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]);
547                  //n1 = 2*local_n1*(fftw->partial_dim);  fftw->partial_dim = temp;
548                  if (fout) {
549                           data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local);
550                           ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
551                           (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
552                         }
553 #else
554                 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
555                 if (fin) {
556                        data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
557                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
558                        (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
559                        }
560                 if (fout) {
561                          data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
562                          ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
563                          (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
564                        }
565                 if (bout) {
566                        data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
567                        ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
568                        (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
569                        }
570 #endif
571             break;
572           }
573   }
574   PetscFunctionReturn(0);
575 }
576 EXTERN_C_END
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "VecScatterPetscToFFTW"
580 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
581 {
582   PetscErrorCode ierr;
583   PetscFunctionBegin;
584   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
585   PetscFunctionReturn(0);
586 }
587 
588 /*
589       VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block.
590   Input A, x, y
591   A - FFTW matrix
592   x - user data
593   Options Database Keys:
594 + -mat_fftw_plannerflags - set FFTW planner flags
595 
596    Level: intermediate
597    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
598          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
599          zeros. This routine does that job by scattering operation.
600 
601 
602 */
603 
604 EXTERN_C_BEGIN
605 #undef __FUNCT__
606 #define __FUNCT__ "VecScatterPetscToFFTW_FTTW"
607 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
608 {
609   PetscErrorCode ierr;
610   MPI_Comm       comm=((PetscObject)A)->comm;
611   Mat_FFT        *fft = (Mat_FFT*)A->data;
612   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
613   PetscInt       N=fft->N;
614   PetscInt       ndim=fft->ndim,*dim=fft->dim;//n=fft->n;
615   PetscInt       low;
616   PetscInt       rank,size,vsize,vsize1;
617   ptrdiff_t      alloc_local,local_n0,local_0_start;
618   ptrdiff_t      local_n1,local_1_start;
619   VecScatter     vecscat;
620   IS             list1,list2;
621 #if !defined(PETSC_USE_COMPLEX)
622   PetscInt       i,j,k,partial_dim;
623   PetscInt       *indx1, *indx2, tempindx, tempindx1;
624   PetscInt       N1, n1 ,NM;
625   ptrdiff_t      temp;
626 #endif
627 
628   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
629   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
630   ierr = VecGetOwnershipRange(y,&low,PETSC_NULL);
631 
632   if (size==1)
633     {
634           ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
635           ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
636           //printf("The values of sizes are %d and %d",vsize,vsize1);
637           ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
638           ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
639           ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
640           ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
641           ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
642           ierr = ISDestroy(&list1);CHKERRQ(ierr);
643     }
644 
645  else{
646 
647  switch (ndim){
648  case 1:
649 #if defined(PETSC_USE_COMPLEX)
650       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
651       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1);
652       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2);
653       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
654       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
655       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
656       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
657       ierr = ISDestroy(&list1);CHKERRQ(ierr);
658       ierr = ISDestroy(&list2);CHKERRQ(ierr);
659 #else
660       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
661 #endif
662  break;
663  case 2:
664 #if defined(PETSC_USE_COMPLEX)
665       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
666       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
667       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
668       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
669       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
670       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
671       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
672       ierr = ISDestroy(&list1);CHKERRQ(ierr);
673       ierr = ISDestroy(&list2);CHKERRQ(ierr);
674 #else
675       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
676       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
677       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
678       //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
679       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
680       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
681 
682       if (dim[1]%2==0)
683         NM = dim[1]+2;
684       else
685         NM = dim[1]+1;
686 
687       for (i=0;i<local_n0;i++){
688          for (j=0;j<dim[1];j++){
689             tempindx = i*dim[1] + j;
690             tempindx1 = i*NM + j;
691             indx1[tempindx]=local_0_start*dim[1]+tempindx;
692             indx2[tempindx]=low+tempindx1;
693         }
694      }
695 
696       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
697       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
698 
699       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
700       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
701       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
702       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
703       ierr = ISDestroy(&list1);CHKERRQ(ierr);
704       ierr = ISDestroy(&list2);CHKERRQ(ierr);
705       ierr = PetscFree(indx1);CHKERRQ(ierr);
706       ierr = PetscFree(indx2);CHKERRQ(ierr);
707 #endif
708  break;
709 
710  case 3:
711 #if defined(PETSC_USE_COMPLEX)
712       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
713       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
714       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
715       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
716       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
717       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
718       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
719       ierr = ISDestroy(&list1);CHKERRQ(ierr);
720       ierr = ISDestroy(&list2);CHKERRQ(ierr);
721 #else
722       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
723       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
724 
725       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
726       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
727 
728       if (dim[2]%2==0)
729         NM = dim[2]+2;
730       else
731         NM = dim[2]+1;
732 
733       for (i=0;i<local_n0;i++){
734          for (j=0;j<dim[1];j++){
735             for (k=0;k<dim[2];k++){
736                tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
737                tempindx1 = i*dim[1]*NM + j*NM + k;
738                indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
739                indx2[tempindx]=low+tempindx1;
740             }
741          }
742       }
743 
744       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
745       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
746       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
747       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
748       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
750       ierr = ISDestroy(&list1);CHKERRQ(ierr);
751       ierr = ISDestroy(&list2);CHKERRQ(ierr);
752       ierr = PetscFree(indx1);CHKERRQ(ierr);
753       ierr = PetscFree(indx2);CHKERRQ(ierr);
754 #endif
755  break;
756 
757  default:
758 #if defined(PETSC_USE_COMPLEX)
759       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
760       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
761       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
762       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
763       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
764       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
765       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
766       ierr = ISDestroy(&list1);CHKERRQ(ierr);
767       ierr = ISDestroy(&list2);CHKERRQ(ierr);
768 #else
769       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
770       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
771       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
772       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
773       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
774 
775       partial_dim = fftw->partial_dim;
776 
777       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
778       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
779 
780       if (dim[ndim-1]%2==0)
781         NM = 2;
782       else
783         NM = 1;
784 
785       j = low;
786       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
787          {
788           indx1[i] = local_0_start*partial_dim + i;
789           indx2[i] = j;
790           if (k%dim[ndim-1]==0)
791             { j+=NM;}
792           j++;
793          }
794       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
795       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
796       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
797       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
798       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
799       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
800       ierr = ISDestroy(&list1);CHKERRQ(ierr);
801       ierr = ISDestroy(&list2);CHKERRQ(ierr);
802       ierr = PetscFree(indx1);CHKERRQ(ierr);
803       ierr = PetscFree(indx2);CHKERRQ(ierr);
804 #endif
805  break;
806   }
807  }
808  return(0);
809 }
810 EXTERN_C_END
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "VecScatterFFTWToPetsc"
814 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
815 {
816   PetscErrorCode ierr;
817   PetscFunctionBegin;
818   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
819   PetscFunctionReturn(0);
820 }
821 
822 /*
823       VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use.
824   Input A, x, y
825   A - FFTW matrix
826   x - FFTW vector
827   y - PETSc vector that user can read
828 
829    Level: intermediate
830    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
831          VecScatterFFTWToPetsc removes those extra zeros.
832 
833 */
834 
835 EXTERN_C_BEGIN
836 #undef __FUNCT__
837 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
838 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
839 {
840   PetscErrorCode ierr;
841   MPI_Comm       comm=((PetscObject)A)->comm;
842   Mat_FFT        *fft = (Mat_FFT*)A->data;
843   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
844   PetscInt       N=fft->N;
845   PetscInt       ndim=fft->ndim,*dim=fft->dim;
846   PetscInt       low;
847   PetscInt       rank,size;
848   ptrdiff_t      alloc_local,local_n0,local_0_start;
849   ptrdiff_t      local_n1,local_1_start;
850   VecScatter     vecscat;
851   IS             list1,list2;
852 #if !defined(PETSC_USE_COMPLEX)
853   PetscInt       i,j,k,partial_dim;
854   PetscInt       *indx1, *indx2, tempindx, tempindx1;
855   PetscInt       N1, n1 ,NM;
856   ptrdiff_t      temp;
857 #endif
858 
859 
860   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
861   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
862   ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr);
863 
864   if (size==1){
865     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
866     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
867     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
870     ierr = ISDestroy(&list1);CHKERRQ(ierr);
871   }
872   else{
873 
874   switch (ndim){
875   case 1:
876 #if defined(PETSC_USE_COMPLEX)
877       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
878       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1);
879       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2);
880       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
881       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
884       ierr = ISDestroy(&list1);CHKERRQ(ierr);
885       ierr = ISDestroy(&list2);CHKERRQ(ierr);
886 #else
887       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
888 #endif
889   break;
890   case 2:
891 #if defined(PETSC_USE_COMPLEX)
892       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
893       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1);
894       ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2);
895       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
896       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
897       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
898       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
899       ierr = ISDestroy(&list1);CHKERRQ(ierr);
900       ierr = ISDestroy(&list2);CHKERRQ(ierr);
901 #else
902       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
903       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
904 
905       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
906       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
907 
908       if (dim[1]%2==0)
909         NM = dim[1]+2;
910       else
911         NM = dim[1]+1;
912 
913       for (i=0;i<local_n0;i++){
914          for (j=0;j<dim[1];j++){
915             tempindx = i*dim[1] + j;
916             tempindx1 = i*NM + j;
917             indx1[tempindx]=local_0_start*dim[1]+tempindx;
918             indx2[tempindx]=low+tempindx1;
919          }
920        }
921 
922        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
923        ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
924 
925        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
926        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
929        ierr = ISDestroy(&list1);CHKERRQ(ierr);
930        ierr = ISDestroy(&list2);CHKERRQ(ierr);
931        ierr = PetscFree(indx1);CHKERRQ(ierr);
932        ierr = PetscFree(indx2);CHKERRQ(ierr);
933 #endif
934   break;
935   case 3:
936 #if defined(PETSC_USE_COMPLEX)
937        alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
938        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
939        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2);
940        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
941        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
944        ierr = ISDestroy(&list1);CHKERRQ(ierr);
945        ierr = ISDestroy(&list2);CHKERRQ(ierr);
946 #else
947 
948        alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
949        N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
950 
951 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr);
952 //     ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr);
953        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
954        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
955 
956        if (dim[2]%2==0)
957         NM = dim[2]+2;
958        else
959         NM = dim[2]+1;
960 
961        for (i=0;i<local_n0;i++){
962           for (j=0;j<dim[1];j++){
963              for (k=0;k<dim[2];k++){
964                 tempindx = i*dim[1]*dim[2] + j*dim[2] + k;
965                 tempindx1 = i*dim[1]*NM + j*NM + k;
966                 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
967                 indx2[tempindx]=low+tempindx1;
968              }
969           }
970        }
971 
972        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
973        ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
974 
975        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
976        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
977        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
979        ierr = ISDestroy(&list1);CHKERRQ(ierr);
980        ierr = ISDestroy(&list2);CHKERRQ(ierr);
981        ierr = PetscFree(indx1);CHKERRQ(ierr);
982        ierr = PetscFree(indx2);CHKERRQ(ierr);
983 #endif
984   break;
985   default:
986 #if defined(PETSC_USE_COMPLEX)
987        alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
988        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
989        ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2);
990        ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
991        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
994        ierr = ISDestroy(&list1);CHKERRQ(ierr);
995        ierr = ISDestroy(&list2);CHKERRQ(ierr);
996 #else
997        temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
998        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
999        alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1000        N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1001        (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1002 
1003        partial_dim = fftw->partial_dim;
1004 
1005        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1006        ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1007 
1008        if (dim[ndim-1]%2==0)
1009          NM = 2;
1010        else
1011          NM = 1;
1012 
1013        j = low;
1014        for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++)
1015           {
1016            indx1[i] = local_0_start*partial_dim + i;
1017            indx2[i] = j;
1018            if (k%dim[ndim-1]==0)
1019              { j+=NM;}
1020            j++;
1021           }
1022        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1023        ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1024 
1025        ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1026        ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1027        ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1028        ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1029        ierr = ISDestroy(&list1);CHKERRQ(ierr);
1030        ierr = ISDestroy(&list2);CHKERRQ(ierr);
1031        ierr = PetscFree(indx1);CHKERRQ(ierr);
1032        ierr = PetscFree(indx2);CHKERRQ(ierr);
1033 #endif
1034   break;
1035   }
1036   }
1037   return 0;
1038 }
1039 EXTERN_C_END
1040 
1041 EXTERN_C_BEGIN
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatCreate_FFTW"
1044 /*
1045       MatCreate_FFTW - Creates a matrix object that provides FFT
1046   via the external package FFTW
1047   Options Database Keys:
1048 + -mat_fftw_plannerflags - set FFTW planner flags
1049 
1050    Level: intermediate
1051 
1052 */
1053 
1054 PetscErrorCode MatCreate_FFTW(Mat A)
1055 {
1056   PetscErrorCode ierr;
1057   MPI_Comm       comm=((PetscObject)A)->comm;
1058   Mat_FFT        *fft=(Mat_FFT*)A->data;
1059   Mat_FFTW       *fftw;
1060   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
1061   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1062   PetscBool      flg;
1063   PetscInt       p_flag,partial_dim=1,ctr;
1064   PetscMPIInt    size,rank;
1065   ptrdiff_t      *pdim;
1066   ptrdiff_t      local_n1,local_1_start;
1067 #if !defined(PETSC_USE_COMPLEX)
1068    ptrdiff_t     temp;
1069    PetscInt      N1,tot_dim;
1070 #else
1071    PetscInt n1;
1072 #endif
1073 
1074   PetscFunctionBegin;
1075   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1076   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1077 
1078   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
1079   pdim[0] = dim[0];
1080 #if !defined(PETSC_USE_COMPLEX)
1081   tot_dim = 2*dim[0];
1082 #endif
1083   for (ctr=1;ctr<ndim;ctr++)
1084      {
1085           partial_dim *= dim[ctr];
1086           pdim[ctr] = dim[ctr];
1087 #if !defined(PETSC_USE_COMPLEX)
1088           if (ctr==ndim-1)
1089             tot_dim *= (dim[ctr]/2+1);
1090           else
1091             tot_dim *= dim[ctr];
1092 #endif
1093      }
1094 
1095   if (size == 1) {
1096 #if defined(PETSC_USE_COMPLEX)
1097     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1098     n = N;
1099 #else
1100     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1101     n = tot_dim;
1102 #endif
1103 
1104   } else {
1105     ptrdiff_t alloc_local,local_n0,local_0_start;
1106     switch (ndim){
1107     case 1:
1108 #if !defined(PETSC_USE_COMPLEX)
1109    SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1110 #else
1111       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1112       n = (PetscInt)local_n0;
1113       n1 = (PetscInt) local_n1;
1114       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1115 #endif
1116       break;
1117     case 2:
1118 #if defined(PETSC_USE_COMPLEX)
1119       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1120       /*
1121        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1122        PetscSynchronizedFlush(comm);
1123        */
1124       n = (PetscInt)local_n0*dim[1];
1125       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1126 #else
1127       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1128       n = 2*(PetscInt)local_n0*(dim[1]/2+1);
1129       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1130 #endif
1131       break;
1132     case 3:
1133 #if defined(PETSC_USE_COMPLEX)
1134       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1135       n = (PetscInt)local_n0*dim[1]*dim[2];
1136       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1137 #else
1138       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1139       n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1140       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1141 #endif
1142       break;
1143     default:
1144 #if defined(PETSC_USE_COMPLEX)
1145       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1146       n = (PetscInt)local_n0*partial_dim;
1147       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1148 #else
1149       temp = pdim[ndim-1];
1150       pdim[ndim-1] = temp/2 + 1;
1151       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start);
1152       n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1153       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1154       pdim[ndim-1] = temp;
1155       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1156 #endif
1157       break;
1158     }
1159   }
1160   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1161   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
1162   fft->data = (void*)fftw;
1163 
1164   fft->n           = n;
1165   fftw->ndim_fftw  = (ptrdiff_t)ndim; // This is dimension of fft
1166   fftw->partial_dim = partial_dim;
1167   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr);
1168   for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1169 
1170   fftw->p_forward  = 0;
1171   fftw->p_backward = 0;
1172   fftw->p_flag     = FFTW_ESTIMATE;
1173 
1174   if (size == 1){
1175     A->ops->mult          = MatMult_SeqFFTW;
1176     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1177   } else {
1178     A->ops->mult          = MatMult_MPIFFTW;
1179     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1180   }
1181   fft->matdestroy          = MatDestroy_FFTW;
1182   //A->ops->getvecs       = MatGetVecs_FFTW;
1183   A->assembled          = PETSC_TRUE;
1184   PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);
1185   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);
1186   PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);
1187 
1188   /* get runtime options */
1189   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1190     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
1191     if (flg) {fftw->p_flag = (unsigned)p_flag;}
1192   PetscOptionsEnd();
1193   PetscFunctionReturn(0);
1194 }
1195 EXTERN_C_END
1196 
1197 
1198 
1199 
1200