xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision dd7ecb2f948ceb99ce111cd0ccf9220ab4ed17cb)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   fftw_plan   p_forward,p_backward;
14   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
15   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
16                                                             executed for the arrays with which the plan was created */
17 } Mat_FFTW;
18 
19 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
20 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
21 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatDestroy_FFTW(Mat);
24 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
25 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
26 
27 #undef __FUNCT__
28 #define __FUNCT__ "MatMult_SeqFFTW"
29 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
30 {
31   PetscErrorCode ierr;
32   Mat_FFT        *fft  = (Mat_FFT*)A->data;
33   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
34   PetscScalar    *x_array,*y_array;
35   PetscInt       ndim=fft->ndim,*dim=fft->dim;
36 
37   PetscFunctionBegin;
38 #if !defined(PETSC_USE_COMPLEX)
39   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
40 #endif
41   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
42   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
43   if (!fftw->p_forward){ /* create a plan, then excute it */
44     switch (ndim){
45     case 1:
46       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
47       break;
48     case 2:
49       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
50       break;
51     case 3:
52       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
53       break;
54     default:
55       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
56       break;
57     }
58     fftw->finarray  = x_array;
59     fftw->foutarray = y_array;
60     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
61                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
62     fftw_execute(fftw->p_forward);
63   } else { /* use existing plan */
64     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
65       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
66     } else {
67       fftw_execute(fftw->p_forward);
68     }
69   }
70   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
71   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 
75 #undef __FUNCT__
76 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
77 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
78 {
79   PetscErrorCode ierr;
80   Mat_FFT        *fft = (Mat_FFT*)A->data;
81   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
82   PetscScalar    *x_array,*y_array;
83   PetscInt       ndim=fft->ndim,*dim=fft->dim;
84 
85   PetscFunctionBegin;
86 #if !defined(PETSC_USE_COMPLEX)
87   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
88 #endif
89   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
90   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
91   if (!fftw->p_backward){ /* create a plan, then excute it */
92     switch (ndim){
93     case 1:
94       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
95       break;
96     case 2:
97       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
98       break;
99     case 3:
100       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
101       break;
102     default:
103       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
104       break;
105     }
106     fftw->binarray  = x_array;
107     fftw->boutarray = y_array;
108     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
109   } else { /* use existing plan */
110     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
111       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
112     } else {
113       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
114     }
115   }
116   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
117   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 #undef __FUNCT__
122 #define __FUNCT__ "MatMult_MPIFFTW"
123 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
124 {
125   PetscErrorCode ierr;
126   Mat_FFT        *fft  = (Mat_FFT*)A->data;
127   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
128   PetscScalar    *x_array,*y_array;
129   PetscInt       ndim=fft->ndim,*dim=fft->dim,ctr;
130   MPI_Comm       comm=((PetscObject)A)->comm;
131   ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
132 
133   PetscFunctionBegin;
134 #if !defined(PETSC_USE_COMPLEX)
135   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
136 #endif
137   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
138   for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
139 
140   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
141   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
142   if (!fftw->p_forward){ /* create a plan, then excute it */
143     switch (ndim){
144     case 1:
145       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
146       break;
147     case 2:
148       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
149       break;
150     case 3:
151       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
152       break;
153     default:
154       fftw->p_forward = fftw_mpi_plan_dft(ndim1,pdim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
155  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
156       break;
157     }
158     fftw->finarray  = x_array;
159     fftw->foutarray = y_array;
160     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
161                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
162     fftw_execute(fftw->p_forward);
163   } else { /* use existing plan */
164     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
165       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
166     } else {
167       fftw_execute(fftw->p_forward);
168     }
169   }
170   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
171   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
172   PetscFunctionReturn(0);
173 }
174 
175 #undef __FUNCT__
176 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
177 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
178 {
179   PetscErrorCode ierr;
180   Mat_FFT        *fft  = (Mat_FFT*)A->data;
181   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
182   PetscScalar    *x_array,*y_array;
183   PetscInt       ndim=fft->ndim,*dim=fft->dim,ctr;
184   MPI_Comm       comm=((PetscObject)A)->comm;
185   ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
186 
187   PetscFunctionBegin;
188 #if !defined(PETSC_USE_COMPLEX)
189   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
190 #endif
191   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); // should pdim be a member of Mat_FFTW?
192   for(ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
193 
194   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
195   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
196   if (!fftw->p_backward){ /* create a plan, then excute it */
197     switch (ndim){
198     case 1:
199       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
200       break;
201     case 2:
202       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
203       break;
204     case 3:
205       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
206       break;
207     default:
208       fftw->p_backward = fftw_mpi_plan_dft(ndim1,pdim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
209       break;
210     }
211     fftw->binarray  = x_array;
212     fftw->boutarray = y_array;
213     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
214   } else { /* use existing plan */
215     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
216       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
217     } else {
218       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
219     }
220   }
221   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
222   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
223   ierr = PetscFree(pdim);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 #undef __FUNCT__
228 #define __FUNCT__ "MatDestroy_FFTW"
229 PetscErrorCode MatDestroy_FFTW(Mat A)
230 {
231   Mat_FFT        *fft = (Mat_FFT*)A->data;
232   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
233   PetscErrorCode ierr;
234 
235   PetscFunctionBegin;
236 #if !defined(PETSC_USE_COMPLEX)
237   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
238 #endif
239   fftw_destroy_plan(fftw->p_forward);
240   fftw_destroy_plan(fftw->p_backward);
241   ierr = PetscFree(fft->data);CHKERRQ(ierr);
242   PetscFunctionReturn(0);
243 }
244 
245 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
246 #undef __FUNCT__
247 #define __FUNCT__ "VecDestroy_MPIFFTW"
248 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
249 {
250   PetscErrorCode  ierr;
251   PetscScalar     *array;
252 
253   PetscFunctionBegin;
254 #if !defined(PETSC_USE_COMPLEX)
255   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
256 #endif
257   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
258   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
259   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
260   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatGetVecs_FFTW"
266 /*
267    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
268      parallel layout determined by FFTW
269 
270    Collective on Mat
271 
272    Input Parameter:
273 .  mat - the matrix
274 
275    Output Parameter:
276 +   fin - (optional) input vector of forward FFTW
277 -   fout - (optional) output vector of forward FFTW
278 
279   Level: advanced
280 
281 .seealso: MatCreateFFTW()
282 */
283 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
284 {
285   PetscErrorCode ierr;
286   PetscMPIInt    size,rank;
287   MPI_Comm       comm=((PetscObject)A)->comm;
288   Mat_FFT        *fft = (Mat_FFT*)A->data;
289   PetscInt       N=fft->N;
290 
291   PetscFunctionBegin;
292 #if !defined(PETSC_USE_COMPLEX)
293   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
294 #endif
295   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
296   PetscValidType(A,1);
297 
298   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
299   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
300   if (size == 1){ /* sequential case */
301     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
302     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
303   } else {        /* mpi case */
304     ptrdiff_t      alloc_local,local_n0,local_0_start;
305     ptrdiff_t      local_n1,local_1_end;
306     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n,ctr;
307     fftw_complex   *data_fin,*data_fout;
308     ptrdiff_t      ndim1,*pdim;
309     ndim1=(ptrdiff_t) ndim;
310     pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
311 
312     for(ctr=0;ctr<ndim;ctr++)
313         {
314            pdim[ctr] = dim[ctr];
315        }
316 
317     switch (ndim){
318     case 1:
319       /* Get local size */
320 
321       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
322       if (fin) {
323         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
324         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
325         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
326       }
327       if (fout) {
328         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
329         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
330         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
331       }
332       break;
333     case 2:
334       /* Get local size */
335       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
336       if (fin) {
337         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
338         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
339         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
340       }
341       if (fout) {
342         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
343         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
344         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
345       }
346       break;
347     case 3:
348       /* Get local size */
349       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
350 //      printf("The quantity n is %d",n);
351       if (fin) {
352         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
353         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
354         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
355       }
356       if (fout) {
357         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
358         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
359         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
360       }
361       break;
362     default:
363       /* Get local size */
364       alloc_local = fftw_mpi_local_size(ndim1,pdim,comm,&local_n0,&local_0_start);
365 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
366 //      printf("The value of alloc local is %d",alloc_local);
367 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
368 //      for(i=0;i<ndim;i++)
369 //         {
370 //          pdim[i]=dim[i];printf("%d",pdim[i]);
371 //         }
372 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
373 //      printf("The quantity n is %d",n);
374       if (fin) {
375         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
376         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
377         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
378       }
379       if (fout) {
380         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
381         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
382         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
383       }
384       break;
385     }
386   }
387   if (fin){
388     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
389   }
390   if (fout){
391     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
392   }
393   PetscFunctionReturn(0);
394 }
395 
396 EXTERN_C_BEGIN
397 #undef __FUNCT__
398 #define __FUNCT__ "MatCreate_FFTW"
399 /*
400       MatCreate_FFTW - Creates a matrix object that provides FFT
401   via the external package FFTW
402 
403   Options Database Keys:
404 + -mat_fftw_plannerflags - set FFTW planner flags
405 
406    Level: intermediate
407 
408 */
409 PetscErrorCode MatCreate_FFTW(Mat A)
410 {
411   PetscErrorCode ierr;
412   MPI_Comm       comm=((PetscObject)A)->comm;
413   Mat_FFT        *fft=(Mat_FFT*)A->data;
414   Mat_FFTW       *fftw;
415   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
416   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
417   PetscBool      flg;
418   PetscInt       p_flag,partial_dim=1,ctr;
419   PetscMPIInt    size,rank;
420   ptrdiff_t      *pdim;
421 
422   PetscFunctionBegin;
423 #if !defined(PETSC_USE_COMPLEX)
424   SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
425 #endif
426 
427   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
428   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
429   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
430   pdim[0] = dim[0];
431   for(ctr=1;ctr<ndim;ctr++)
432       {
433           partial_dim*=dim[ctr];
434           pdim[ctr] = dim[ctr];
435       }
436 //  printf("partial dimension is %d",partial_dim);
437   if (size == 1) {
438     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
439     n = N;
440   } else {
441     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
442     switch (ndim){
443     case 1:
444       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
445       n = (PetscInt)local_n0;
446       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
447 
448       break;
449     case 2:
450       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
451       /*
452        PetscMPIInt    rank;
453        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
454        PetscSynchronizedFlush(comm);
455        */
456       n = (PetscInt)local_n0*dim[1];
457       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
458       break;
459     case 3:
460       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
461 //      printf("The value of alloc local is %d",alloc_local);
462       n = (PetscInt)local_n0*dim[1]*dim[2];
463       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
464       break;
465     default:
466       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
467 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
468 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
469       n = (PetscInt)local_n0*partial_dim;
470 //      printf("New partial dimension is %d %d %d",n,N,ndim);
471       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
472       break;
473     }
474   }
475   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
476 
477   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
478   fft->data = (void*)fftw;
479 
480   fft->n           = n;
481   fftw->p_forward  = 0;
482   fftw->p_backward = 0;
483   fftw->p_flag     = FFTW_ESTIMATE;
484 
485   if (size == 1){
486     A->ops->mult          = MatMult_SeqFFTW;
487     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
488   } else {
489     A->ops->mult          = MatMult_MPIFFTW;
490     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
491   }
492   fft->matdestroy          = MatDestroy_FFTW;
493   A->ops->getvecs       = MatGetVecs_FFTW;
494   A->assembled          = PETSC_TRUE;
495 
496   /* get runtime options */
497   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
498     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
499     if (flg) {fftw->p_flag = (unsigned)p_flag;}
500   PetscOptionsEnd();
501   PetscFunctionReturn(0);
502 }
503 EXTERN_C_END
504