xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision 7a21806cec79a1efbed21fbe5568b2c145b0efd9)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   PetscInt    partial_dim;
15   fftw_plan   p_forward,p_backward;
16   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
17   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
18                                                             executed for the arrays with which the plan was created */
19 } Mat_FFTW;
20 
21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
25 extern PetscErrorCode MatDestroy_FFTW(Mat);
26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
27 
28 /* MatMult_SeqFFTW performs forward DFT in parallel
29    Input parameter:
30      A - the matrix
31      x - the vector on which FDFT will be performed
32 
33    Output parameter:
34      y - vector that stores result of FDFT
35 */
36 #undef __FUNCT__
37 #define __FUNCT__ "MatMult_SeqFFTW"
38 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
39 {
40   PetscErrorCode ierr;
41   Mat_FFT        *fft  = (Mat_FFT*)A->data;
42   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
43   PetscScalar    *x_array,*y_array;
44   PetscInt       ndim = fft->ndim,*dim = fft->dim;
45   int            *dim_32,i;
46 
47   PetscFunctionBegin;
48   ierr = PetscMalloc(ndim*sizeof(int),&dim_32);
49   for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i];
50 
51   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
52   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
53   if (!fftw->p_forward) { /* create a plan, then excute it */
54     switch (ndim) {
55     case 1:
56 #if defined(PETSC_USE_COMPLEX)
57       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
58 #else
59       fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
60 #endif
61       break;
62     case 2:
63 #if defined(PETSC_USE_COMPLEX)
64       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
65 #else
66       fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
67 #endif
68       break;
69     case 3:
70 #if defined(PETSC_USE_COMPLEX)
71       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
72       //fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,FFTW_MEASURE);
73 #else
74       fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
75 #endif
76       break;
77     default:
78 #if defined(PETSC_USE_COMPLEX)
79       fftw->p_forward = fftw_plan_dft((int)ndim,(int*)dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
80       /* bug: dim[3] changes from 10 to 0 '--with-64-bit-indices=1' */
81 #else
82       fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag);
83 #endif
84       break;
85     }
86     fftw->finarray  = x_array;
87     fftw->foutarray = y_array;
88     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
89                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
90     fftw_execute(fftw->p_forward);
91   } else { /* use existing plan */
92     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
93       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
94     } else {
95       fftw_execute(fftw->p_forward);
96     }
97   }
98   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
99   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
100   ierr = PetscFree(dim_32);CHKERRQ(ierr);
101   PetscFunctionReturn(0);
102 }
103 
104 /* MatMultTranspose_SeqFFTW performs serial backward DFT
105    Input parameter:
106      A - the matrix
107      x - the vector on which BDFT will be performed
108 
109    Output parameter:
110      y - vector that stores result of BDFT
111 */
112 
113 #undef __FUNCT__
114 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
115 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
116 {
117   PetscErrorCode ierr;
118   Mat_FFT        *fft  = (Mat_FFT*)A->data;
119   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
120   PetscScalar    *x_array,*y_array;
121   PetscInt       ndim=fft->ndim,*dim=fft->dim;
122   int            *dim_32,i; /* for 64-bit */
123 
124   PetscFunctionBegin;
125   ierr = PetscMalloc(ndim*sizeof(int),&dim_32);
126   for (i=0; i<ndim; i++) dim_32[i] = (int)dim[i];
127 
128   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
129   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
130   if (!fftw->p_backward) { /* create a plan, then excute it */
131     switch (ndim) {
132     case 1:
133 #if defined(PETSC_USE_COMPLEX)
134       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
135 #else
136       fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
137 #endif
138       break;
139     case 2:
140 #if defined(PETSC_USE_COMPLEX)
141       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
142 #else
143       fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
144 #endif
145       break;
146     case 3:
147 #if defined(PETSC_USE_COMPLEX)
148       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
149       //fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,FFTW_MEASURE);
150 #else
151       fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
152 #endif
153       break;
154     default:
155 #if defined(PETSC_USE_COMPLEX)
156       fftw->p_backward = fftw_plan_dft((int)ndim,dim_32,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
157 #else
158       fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag);
159 #endif
160       break;
161     }
162     fftw->binarray  = x_array;
163     fftw->boutarray = y_array;
164     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
165   } else { /* use existing plan */
166     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
167       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
168     } else {
169       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
170     }
171   }
172   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
173   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
174   ierr = PetscFree(dim_32);CHKERRQ(ierr);
175   PetscFunctionReturn(0);
176 }
177 
178 /* MatMult_MPIFFTW performs forward DFT in parallel
179    Input parameter:
180      A - the matrix
181      x - the vector on which FDFT will be performed
182 
183    Output parameter:
184    y   - vector that stores result of FDFT
185 */
186 #undef __FUNCT__
187 #define __FUNCT__ "MatMult_MPIFFTW"
188 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
189 {
190   PetscErrorCode ierr;
191   Mat_FFT        *fft  = (Mat_FFT*)A->data;
192   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
193   PetscScalar    *x_array,*y_array;
194   PetscInt       ndim=fft->ndim,*dim=fft->dim;
195   MPI_Comm       comm;
196 
197   PetscFunctionBegin;
198   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
199   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
200   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
201   if (!fftw->p_forward) { /* create a plan, then excute it */
202     switch (ndim) {
203     case 1:
204 #if defined(PETSC_USE_COMPLEX)
205       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
206 #else
207       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
208 #endif
209       break;
210     case 2:
211 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */
212       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
213 #else
214       fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
215 #endif
216       break;
217     case 3:
218 #if defined(PETSC_USE_COMPLEX)
219       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
220 #else
221       fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
222 #endif
223       break;
224     default:
225 #if defined(PETSC_USE_COMPLEX)
226       fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
227 #else
228       fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE);
229 #endif
230       break;
231     }
232     fftw->finarray  = x_array;
233     fftw->foutarray = y_array;
234     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
235                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
236     fftw_execute(fftw->p_forward);
237   } else { /* use existing plan */
238     if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */
239       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
240     } else {
241       fftw_execute(fftw->p_forward);
242     }
243   }
244   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
245   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
246   PetscFunctionReturn(0);
247 }
248 
249 /* MatMultTranspose_MPIFFTW performs parallel backward DFT
250    Input parameter:
251      A - the matrix
252      x - the vector on which BDFT will be performed
253 
254    Output parameter:
255      y - vector that stores result of BDFT
256 */
257 #undef __FUNCT__
258 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
259 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
260 {
261   PetscErrorCode ierr;
262   Mat_FFT        *fft  = (Mat_FFT*)A->data;
263   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
264   PetscScalar    *x_array,*y_array;
265   PetscInt       ndim=fft->ndim,*dim=fft->dim;
266   MPI_Comm       comm;
267 
268   PetscFunctionBegin;
269   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
270   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
271   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
272   if (!fftw->p_backward) { /* create a plan, then excute it */
273     switch (ndim) {
274     case 1:
275 #if defined(PETSC_USE_COMPLEX)
276       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
277 #else
278       SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet");
279 #endif
280       break;
281     case 2:
282 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */
283       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
284 #else
285       fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
286 #endif
287       break;
288     case 3:
289 #if defined(PETSC_USE_COMPLEX)
290       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
291 #else
292       fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
293 #endif
294       break;
295     default:
296 #if defined(PETSC_USE_COMPLEX)
297       fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
298 #else
299       fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE);
300 #endif
301       break;
302     }
303     fftw->binarray  = x_array;
304     fftw->boutarray = y_array;
305     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
306   } else { /* use existing plan */
307     if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */
308       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
309     } else {
310       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
311     }
312   }
313   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
314   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatDestroy_FFTW"
320 PetscErrorCode MatDestroy_FFTW(Mat A)
321 {
322   Mat_FFT        *fft  = (Mat_FFT*)A->data;
323   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
324   PetscErrorCode ierr;
325 
326   PetscFunctionBegin;
327   fftw_destroy_plan(fftw->p_forward);
328   fftw_destroy_plan(fftw->p_backward);
329   ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr);
330   ierr = PetscFree(fft->data);CHKERRQ(ierr);
331   fftw_mpi_cleanup();
332   PetscFunctionReturn(0);
333 }
334 
335 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
336 #undef __FUNCT__
337 #define __FUNCT__ "VecDestroy_MPIFFTW"
338 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
339 {
340   PetscErrorCode ierr;
341   PetscScalar    *array;
342 
343   PetscFunctionBegin;
344   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
345   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
346   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
347   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "MatGetVecsFFTW"
353 /*@
354    MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the
355      parallel layout determined by FFTW
356 
357    Collective on Mat
358 
359    Input Parameter:
360 .   A - the matrix
361 
362    Output Parameter:
363 +   x - (optional) input vector of forward FFTW
364 -   y - (optional) output vector of forward FFTW
365 -   z - (optional) output vector of backward FFTW
366 
367   Level: advanced
368 
369   Note: The parallel layout of output of forward FFTW is always same as the input
370         of the backward FFTW. But parallel layout of the input vector of forward
371         FFTW might not be same as the output of backward FFTW.
372         Also note that we need to provide enough space while doing parallel real transform.
373         We need to pad extra zeros at the end of last dimension. For this reason the one needs to
374         invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the
375         last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded
376         depends on if the last dimension is even or odd. If the last dimension is even need to pad two
377         zeros if it is odd only one zero is needed.
378         Lastly one needs some scratch space at the end of data set in each process. alloc_local
379         figures out how much space is needed, i.e. it figures out the data+scratch space for
380         each processor and returns that.
381 
382 .seealso: MatCreateFFTW()
383 @*/
384 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z)
385 {
386   PetscErrorCode ierr;
387 
388   PetscFunctionBegin;
389   ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatGetVecsFFTW_FFTW"
395 PetscErrorCode  MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout)
396 {
397   PetscErrorCode ierr;
398   PetscMPIInt    size,rank;
399   MPI_Comm       comm;
400   Mat_FFT        *fft  = (Mat_FFT*)A->data;
401   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
402   PetscInt       N     = fft->N;
403   PetscInt       ndim  = fft->ndim,*dim=fft->dim,n=fft->n;
404 
405   PetscFunctionBegin;
406   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
407   PetscValidType(A,1);
408   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
409 
410   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
411   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
412   if (size == 1) { /* sequential case */
413 #if defined(PETSC_USE_COMPLEX)
414     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
415     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
416     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);}
417 #else
418     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);}
419     if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);}
420     if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);}
421 #endif
422   } else { /* parallel cases */
423     ptrdiff_t    alloc_local,local_n0,local_0_start;
424     ptrdiff_t    local_n1;
425     fftw_complex *data_fout;
426     ptrdiff_t    local_1_start;
427 #if defined(PETSC_USE_COMPLEX)
428     fftw_complex *data_fin,*data_bout;
429 #else
430     double    *data_finr,*data_boutr;
431     PetscInt  n1,N1;
432     ptrdiff_t temp;
433 #endif
434 
435     switch (ndim) {
436     case 1:
437 #if !defined(PETSC_USE_COMPLEX)
438       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform");
439 #else
440       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
441       if (fin) {
442         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
443         ierr      = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
444 
445         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
446       }
447       if (fout) {
448         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
449         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
450 
451         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
452       }
453       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
454       if (bout) {
455         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
456         ierr      = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
457 
458         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
459       }
460       break;
461 #endif
462     case 2:
463 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */
464       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
465       N1          = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
466       if (fin) {
467         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
468         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
469 
470         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
471       }
472       if (bout) {
473         data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
474         ierr       = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
475 
476         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
477       }
478       if (fout) {
479         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
480         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
481 
482         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
483       }
484 #else
485       /* Get local size */
486       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
487       if (fin) {
488         data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
489         ierr     = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
490 
491         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
492       }
493       if (fout) {
494         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
495         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
496 
497         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
498       }
499       if (bout) {
500         data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
501         ierr      = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
502 
503         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
504       }
505 #endif
506       break;
507     case 3:
508 #if !defined(PETSC_USE_COMPLEX)
509       alloc_local =  fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
510       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
511       if (fin) {
512         data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2);
513         ierr      = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
514 
515         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
516       }
517       if (bout) {
518         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
519         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
520 
521         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
522       }
523 
524       if (fout) {
525         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
526         ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
527 
528         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
529       }
530 #else
531       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
532       if (fin) {
533         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
534         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
535 
536         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
537       }
538       if (fout) {
539         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
540         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
541 
542         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
543       }
544       if (bout) {
545         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
546         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
547 
548         (*bout)->ops->destroy   = VecDestroy_MPIFFTW;
549       }
550 #endif
551       break;
552     default:
553 #if !defined(PETSC_USE_COMPLEX)
554       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
555 
556       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
557 
558       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
559       N1          = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
560 
561       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
562 
563       if (fin) {
564         data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
565         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr);
566 
567         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
568       }
569       if (bout) {
570         data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2);
571         ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr);
572 
573         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
574       }
575 
576       if (fout) {
577         data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
578         ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr);
579 
580         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
581       }
582 #else
583       alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
584       if (fin) {
585         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
586         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
587 
588         (*fin)->ops->destroy = VecDestroy_MPIFFTW;
589       }
590       if (fout) {
591         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
592         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
593 
594         (*fout)->ops->destroy = VecDestroy_MPIFFTW;
595       }
596       if (bout) {
597         data_bout  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
598         ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr);
599 
600         (*bout)->ops->destroy = VecDestroy_MPIFFTW;
601       }
602 #endif
603       break;
604     }
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 #undef __FUNCT__
610 #define __FUNCT__ "VecScatterPetscToFFTW"
611 /*@
612    VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block.
613 
614    Collective on Mat
615 
616    Input Parameters:
617 +  A - FFTW matrix
618 -  x - the PETSc vector
619 
620    Output Parameters:
621 .  y - the FFTW vector
622 
623   Options Database Keys:
624 . -mat_fftw_plannerflags - set FFTW planner flags
625 
626    Level: intermediate
627 
628    Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when
629          one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra
630          zeros. This routine does that job by scattering operation.
631 
632 .seealso: VecScatterFFTWToPetsc()
633 @*/
634 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y)
635 {
636   PetscErrorCode ierr;
637 
638   PetscFunctionBegin;
639   ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
640   PetscFunctionReturn(0);
641 }
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW"
645 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y)
646 {
647   PetscErrorCode ierr;
648   MPI_Comm       comm;
649   Mat_FFT        *fft  = (Mat_FFT*)A->data;
650   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
651   PetscInt       N     =fft->N;
652   PetscInt       ndim  =fft->ndim,*dim=fft->dim;
653   PetscInt       low;
654   PetscMPIInt    rank,size;
655   PetscInt       vsize,vsize1;
656   //ptrdiff_t      alloc_local,local_n0,local_0_start;
657   ptrdiff_t      local_n0,local_0_start;
658   ptrdiff_t      local_n1,local_1_start;
659   VecScatter     vecscat;
660   IS             list1,list2;
661 #if !defined(PETSC_USE_COMPLEX)
662   PetscInt       i,j,k,partial_dim;
663   PetscInt       *indx1, *indx2, tempindx, tempindx1;
664   PetscInt       N1, n1,NM;
665   ptrdiff_t      temp;
666 #endif
667 
668   PetscFunctionBegin;
669   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
670   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
671   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
672   ierr = VecGetOwnershipRange(y,&low,NULL);
673 
674   if (size==1) {
675     ierr = VecGetSize(x,&vsize);CHKERRQ(ierr);
676     ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr);
677     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr);
678     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
679     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
680     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
681     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
682     ierr = ISDestroy(&list1);CHKERRQ(ierr);
683   } else {
684     switch (ndim) {
685     case 1:
686 #if defined(PETSC_USE_COMPLEX)
687       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
688 
689       ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);
690       ierr = ISCreateStride(comm,local_n0,low,1,&list2);
691       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
692       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
693       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
694       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
695       ierr = ISDestroy(&list1);CHKERRQ(ierr);
696       ierr = ISDestroy(&list2);CHKERRQ(ierr);
697 #else
698       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
699 #endif
700       break;
701     case 2:
702 #if defined(PETSC_USE_COMPLEX)
703       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
704 
705       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
706       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
707       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
708       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
709       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
710       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
711       ierr = ISDestroy(&list1);CHKERRQ(ierr);
712       ierr = ISDestroy(&list2);CHKERRQ(ierr);
713 #else
714       alloc_local =  fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
715 
716       N1   = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
717       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
718       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
719 
720       if (dim[1]%2==0) {
721         NM = dim[1]+2;
722       } else {
723         NM = dim[1]+1;
724       }
725       for (i=0; i<local_n0; i++) {
726         for (j=0; j<dim[1]; j++) {
727           tempindx  = i*dim[1] + j;
728           tempindx1 = i*NM + j;
729 
730           indx1[tempindx]=local_0_start*dim[1]+tempindx;
731           indx2[tempindx]=low+tempindx1;
732         }
733       }
734 
735       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
736       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
737 
738       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
739       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
741       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
742       ierr = ISDestroy(&list1);CHKERRQ(ierr);
743       ierr = ISDestroy(&list2);CHKERRQ(ierr);
744       ierr = PetscFree(indx1);CHKERRQ(ierr);
745       ierr = PetscFree(indx2);CHKERRQ(ierr);
746 #endif
747       break;
748 
749     case 3:
750 #if defined(PETSC_USE_COMPLEX)
751       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
752 
753       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
754       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
755       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
756       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
757       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
758       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
759       ierr = ISDestroy(&list1);CHKERRQ(ierr);
760       ierr = ISDestroy(&list2);CHKERRQ(ierr);
761 #else
762       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
763 
764       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
765 
766       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
767       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
768 
769       if (dim[2]%2==0) NM = dim[2]+2;
770       else             NM = dim[2]+1;
771 
772       for (i=0; i<local_n0; i++) {
773         for (j=0; j<dim[1]; j++) {
774           for (k=0; k<dim[2]; k++) {
775             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
776             tempindx1 = i*dim[1]*NM + j*NM + k;
777 
778             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
779             indx2[tempindx]=low+tempindx1;
780           }
781         }
782       }
783 
784       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
785       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
786       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
787       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
788       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
789       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
790       ierr = ISDestroy(&list1);CHKERRQ(ierr);
791       ierr = ISDestroy(&list2);CHKERRQ(ierr);
792       ierr = PetscFree(indx1);CHKERRQ(ierr);
793       ierr = PetscFree(indx2);CHKERRQ(ierr);
794 #endif
795       break;
796 
797     default:
798 #if defined(PETSC_USE_COMPLEX)
799       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
800 
801       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
802       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
803       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
804       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
805       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
806       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
807       ierr = ISDestroy(&list1);CHKERRQ(ierr);
808       ierr = ISDestroy(&list2);CHKERRQ(ierr);
809 #else
810       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
811 
812       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
813 
814       fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
815 
816       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
817 
818       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
819 
820       partial_dim = fftw->partial_dim;
821 
822       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
823       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
824 
825       if (dim[ndim-1]%2==0) NM = 2;
826       else                  NM = 1;
827 
828       j = low;
829       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) {
830         indx1[i] = local_0_start*partial_dim + i;
831         indx2[i] = j;
832         if (k%dim[ndim-1]==0) j+=NM;
833         j++;
834       }
835       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
836       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
837       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
838       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
839       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
840       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
841       ierr = ISDestroy(&list1);CHKERRQ(ierr);
842       ierr = ISDestroy(&list2);CHKERRQ(ierr);
843       ierr = PetscFree(indx1);CHKERRQ(ierr);
844       ierr = PetscFree(indx2);CHKERRQ(ierr);
845 #endif
846       break;
847     }
848   }
849   PetscFunctionReturn(0);
850 }
851 
852 #undef __FUNCT__
853 #define __FUNCT__ "VecScatterFFTWToPetsc"
854 /*@
855    VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector.
856 
857    Collective on Mat
858 
859     Input Parameters:
860 +   A - FFTW matrix
861 -   x - FFTW vector
862 
863    Output Parameters:
864 .  y - PETSc vector
865 
866    Level: intermediate
867 
868    Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension.
869          VecScatterFFTWToPetsc removes those extra zeros.
870 
871 .seealso: VecScatterPetscToFFTW()
872 @*/
873 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y)
874 {
875   PetscErrorCode ierr;
876 
877   PetscFunctionBegin;
878   ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr);
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW"
884 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y)
885 {
886   PetscErrorCode ierr;
887   MPI_Comm       comm;
888   Mat_FFT        *fft  = (Mat_FFT*)A->data;
889   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
890   PetscInt       N     = fft->N;
891   PetscInt       ndim  = fft->ndim,*dim=fft->dim;
892   PetscInt       low;
893   PetscMPIInt    rank,size;
894   ptrdiff_t      local_n0,local_0_start;
895   ptrdiff_t      local_n1,local_1_start;
896   VecScatter     vecscat;
897   IS             list1,list2;
898 #if !defined(PETSC_USE_COMPLEX)
899   PetscInt  i,j,k,partial_dim;
900   PetscInt  *indx1, *indx2, tempindx, tempindx1;
901   PetscInt  N1, n1,NM;
902   ptrdiff_t temp;
903 #endif
904 
905   PetscFunctionBegin;
906   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
907   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
908   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
909   ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr);
910 
911   if (size==1) {
912     ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr);
913     ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr);
914     ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915     ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916     ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
917     ierr = ISDestroy(&list1);CHKERRQ(ierr);
918 
919   } else {
920     switch (ndim) {
921     case 1:
922 #if defined(PETSC_USE_COMPLEX)
923       fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
924 
925       ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);
926       ierr = ISCreateStride(comm,local_n1,low,1,&list2);
927       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
928       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
929       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
930       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
931       ierr = ISDestroy(&list1);CHKERRQ(ierr);
932       ierr = ISDestroy(&list2);CHKERRQ(ierr);
933 #else
934       SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT");
935 #endif
936       break;
937     case 2:
938 #if defined(PETSC_USE_COMPLEX)
939       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
940 
941       ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);
942       ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);
943       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
944       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
947       ierr = ISDestroy(&list1);CHKERRQ(ierr);
948       ierr = ISDestroy(&list2);CHKERRQ(ierr);
949 #else
950       fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
951 
952       N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1);
953 
954       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr);
955       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr);
956 
957       if (dim[1]%2==0) NM = dim[1]+2;
958       else             NM = dim[1]+1;
959 
960       for (i=0; i<local_n0; i++) {
961         for (j=0; j<dim[1]; j++) {
962           tempindx = i*dim[1] + j;
963           tempindx1 = i*NM + j;
964 
965           indx1[tempindx]=local_0_start*dim[1]+tempindx;
966           indx2[tempindx]=low+tempindx1;
967         }
968       }
969 
970       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
971       ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
972 
973       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
974       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
977       ierr = ISDestroy(&list1);CHKERRQ(ierr);
978       ierr = ISDestroy(&list2);CHKERRQ(ierr);
979       ierr = PetscFree(indx1);CHKERRQ(ierr);
980       ierr = PetscFree(indx2);CHKERRQ(ierr);
981 #endif
982       break;
983     case 3:
984 #if defined(PETSC_USE_COMPLEX)
985       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
986 
987       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);
988       ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);
989       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
990       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
993       ierr = ISDestroy(&list1);CHKERRQ(ierr);
994       ierr = ISDestroy(&list2);CHKERRQ(ierr);
995 #else
996 
997       fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
998 
999       N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1);
1000 
1001       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr);
1002       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr);
1003 
1004       if (dim[2]%2==0) NM = dim[2]+2;
1005       else             NM = dim[2]+1;
1006 
1007       for (i=0; i<local_n0; i++) {
1008         for (j=0; j<dim[1]; j++) {
1009           for (k=0; k<dim[2]; k++) {
1010             tempindx  = i*dim[1]*dim[2] + j*dim[2] + k;
1011             tempindx1 = i*dim[1]*NM + j*NM + k;
1012 
1013             indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx;
1014             indx2[tempindx]=low+tempindx1;
1015           }
1016         }
1017       }
1018 
1019       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1020       ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1021 
1022       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1023       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1024       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1026       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1027       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1028       ierr = PetscFree(indx1);CHKERRQ(ierr);
1029       ierr = PetscFree(indx2);CHKERRQ(ierr);
1030 #endif
1031       break;
1032     default:
1033 #if defined(PETSC_USE_COMPLEX)
1034       fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start);
1035 
1036       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);
1037       ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);
1038       ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr);
1039       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1040       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1041       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1042       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1043       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1044 #else
1045       temp = (fftw->dim_fftw)[fftw->ndim_fftw-1];
1046 
1047       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1;
1048 
1049       alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1050 
1051       N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp);
1052 
1053       (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp;
1054 
1055       partial_dim = fftw->partial_dim;
1056 
1057       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr);
1058       ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr);
1059 
1060       if (dim[ndim-1]%2==0) NM = 2;
1061       else                  NM = 1;
1062 
1063       j = low;
1064       for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) {
1065         indx1[i] = local_0_start*partial_dim + i;
1066         indx2[i] = j;
1067         if (k%dim[ndim-1]==0) j+=NM;
1068         j++;
1069       }
1070       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr);
1071       ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr);
1072 
1073       ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr);
1074       ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1075       ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1076       ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr);
1077       ierr = ISDestroy(&list1);CHKERRQ(ierr);
1078       ierr = ISDestroy(&list2);CHKERRQ(ierr);
1079       ierr = PetscFree(indx1);CHKERRQ(ierr);
1080       ierr = PetscFree(indx2);CHKERRQ(ierr);
1081 #endif
1082       break;
1083     }
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatCreate_FFTW"
1090 /*
1091     MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW
1092 
1093   Options Database Keys:
1094 + -mat_fftw_plannerflags - set FFTW planner flags
1095 
1096    Level: intermediate
1097 
1098 */
1099 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A)
1100 {
1101   PetscErrorCode ierr;
1102   MPI_Comm       comm;
1103   Mat_FFT        *fft=(Mat_FFT*)A->data;
1104   Mat_FFTW       *fftw;
1105   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim;
1106   const char     *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
1107   unsigned       iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE};
1108   PetscBool      flg;
1109   PetscInt       p_flag,partial_dim=1,ctr;
1110   PetscMPIInt    size,rank;
1111   ptrdiff_t      *pdim;
1112   ptrdiff_t      local_n1,local_1_start;
1113 #if !defined(PETSC_USE_COMPLEX)
1114   ptrdiff_t temp;
1115   PetscInt  N1,tot_dim;
1116 #else
1117   PetscInt n1;
1118 #endif
1119 
1120   PetscFunctionBegin;
1121   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1122   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
1123   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
1124 
1125   fftw_mpi_init();
1126   pdim    = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t));
1127   pdim[0] = dim[0];
1128 #if !defined(PETSC_USE_COMPLEX)
1129   tot_dim = 2*dim[0];
1130 #endif
1131   for (ctr=1; ctr<ndim; ctr++) {
1132     partial_dim *= dim[ctr];
1133     pdim[ctr]    = dim[ctr];
1134 #if !defined(PETSC_USE_COMPLEX)
1135     if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1);
1136     else             tot_dim *= dim[ctr];
1137 #endif
1138   }
1139 
1140   if (size == 1) {
1141 #if defined(PETSC_USE_COMPLEX)
1142     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
1143     n    = N;
1144 #else
1145     ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr);
1146     n    = tot_dim;
1147 #endif
1148 
1149   } else {
1150     ptrdiff_t local_n0,local_0_start;
1151     switch (ndim) {
1152     case 1:
1153 #if !defined(PETSC_USE_COMPLEX)
1154       SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform");
1155 #else
1156       fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start);
1157 
1158       n    = (PetscInt)local_n0;
1159       n1   = (PetscInt)local_n1;
1160       ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr);
1161 #endif
1162       break;
1163     case 2:
1164 #if defined(PETSC_USE_COMPLEX)
1165       fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
1166       /*
1167        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
1168        PetscSynchronizedFlush(comm,PETSC_STDOUT);
1169        */
1170       n    = (PetscInt)local_n0*dim[1];
1171       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1172 #else
1173       alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1174 
1175       n    = 2*(PetscInt)local_n0*(dim[1]/2+1);
1176       ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));
1177 #endif
1178       break;
1179     case 3:
1180 #if defined(PETSC_USE_COMPLEX)
1181       fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
1182 
1183       n    = (PetscInt)local_n0*dim[1]*dim[2];
1184       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1185 #else
1186       alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1187 
1188       n   = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1);
1189       ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));
1190 #endif
1191       break;
1192     default:
1193 #if defined(PETSC_USE_COMPLEX)
1194       fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
1195 
1196       n    = (PetscInt)local_n0*partial_dim;
1197       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
1198 #else
1199       temp = pdim[ndim-1];
1200 
1201       pdim[ndim-1] = temp/2 + 1;
1202 
1203       alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start);
1204 
1205       n  = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp;
1206       N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp);
1207 
1208       pdim[ndim-1] = temp;
1209 
1210       ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr);
1211 #endif
1212       break;
1213     }
1214   }
1215   ierr      = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
1216   ierr      = PetscNewLog(A,&fftw);CHKERRQ(ierr);
1217   fft->data = (void*)fftw;
1218 
1219   fft->n            = n;
1220   fftw->ndim_fftw   = (ptrdiff_t)ndim; /* This is dimension of fft */
1221   fftw->partial_dim = partial_dim;
1222 
1223   ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr);
1224 
1225   for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr];
1226 
1227   fftw->p_forward  = 0;
1228   fftw->p_backward = 0;
1229   fftw->p_flag     = FFTW_ESTIMATE;
1230 
1231   if (size == 1) {
1232     A->ops->mult          = MatMult_SeqFFTW;
1233     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
1234   } else {
1235     A->ops->mult          = MatMult_MPIFFTW;
1236     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
1237   }
1238   fft->matdestroy = MatDestroy_FFTW;
1239   A->assembled    = PETSC_TRUE;
1240   A->preallocated = PETSC_TRUE;
1241 
1242   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr);
1245 
1246   /* get runtime options */
1247   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
1248   ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr);
1249   if (flg) {
1250     fftw->p_flag = iplans[p_flag];
1251   }
1252   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 
1257 
1258 
1259