xref: /petsc/src/mat/impls/fft/fftw/fftw.c (revision c2cd2aa3969a14fa686ea977e3dce69115efdab6)
1 
2 /*
3     Provides an interface to the FFTW package.
4     Testing examples can be found in ~src/mat/examples/tests
5 */
6 
7 #include <../src/mat/impls/fft/fft.h>   /*I "petscmat.h" I*/
8 EXTERN_C_BEGIN
9 #include <fftw3-mpi.h>
10 EXTERN_C_END
11 
12 typedef struct {
13   ptrdiff_t   ndim_fftw,*dim_fftw;
14   fftw_plan   p_forward,p_backward;
15   unsigned    p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */
16   PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be
17                                                             executed for the arrays with which the plan was created */
18 } Mat_FFTW;
19 
20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec);
21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec);
22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec);
23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec);
24 extern PetscErrorCode MatDestroy_FFTW(Mat);
25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec);
26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*);
27 
28 #undef __FUNCT__
29 #define __FUNCT__ "MatMult_SeqFFTW"
30 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y)
31 {
32   PetscErrorCode ierr;
33   Mat_FFT        *fft  = (Mat_FFT*)A->data;
34   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
35   PetscScalar    *x_array,*y_array;
36   PetscInt       ndim=fft->ndim,*dim=fft->dim;
37 
38   PetscFunctionBegin;
39 #if !defined(PETSC_USE_COMPLEX)
40 
41   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
42 #endif
43   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
44   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
45   if (!fftw->p_forward){ /* create a plan, then excute it */
46     switch (ndim){
47     case 1:
48       fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
49       break;
50     case 2:
51       fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
52       break;
53     case 3:
54       fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
55       break;
56     default:
57       fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag);
58       break;
59     }
60     fftw->finarray  = x_array;
61     fftw->foutarray = y_array;
62     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
63                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
64     fftw_execute(fftw->p_forward);
65   } else { /* use existing plan */
66     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
67       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
68     } else {
69       fftw_execute(fftw->p_forward);
70     }
71   }
72   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
73   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
74   PetscFunctionReturn(0);
75 }
76 
77 #undef __FUNCT__
78 #define __FUNCT__ "MatMultTranspose_SeqFFTW"
79 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y)
80 {
81   PetscErrorCode ierr;
82   Mat_FFT        *fft = (Mat_FFT*)A->data;
83   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
84   PetscScalar    *x_array,*y_array;
85   PetscInt       ndim=fft->ndim,*dim=fft->dim;
86 
87   PetscFunctionBegin;
88 #if !defined(PETSC_USE_COMPLEX)
89   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
90 #endif
91   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
92   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
93   if (!fftw->p_backward){ /* create a plan, then excute it */
94     switch (ndim){
95     case 1:
96       fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
97       break;
98     case 2:
99       fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
100       break;
101     case 3:
102       fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
103       break;
104     default:
105       fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag);
106       break;
107     }
108     fftw->binarray  = x_array;
109     fftw->boutarray = y_array;
110     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
111   } else { /* use existing plan */
112     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
113       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
114     } else {
115       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
116     }
117   }
118   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
119   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatMult_MPIFFTW"
125 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y)
126 {
127   PetscErrorCode ierr;
128   Mat_FFT        *fft  = (Mat_FFT*)A->data;
129   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
130   PetscScalar    *x_array,*y_array;
131   PetscInt       ndim=fft->ndim,*dim=fft->dim,ctr;
132   MPI_Comm       comm=((PetscObject)A)->comm;
133   ptrdiff_t      ndim1=(ptrdiff_t) ndim,*pdim;
134 
135   PetscFunctionBegin;
136 #if !defined(PETSC_USE_COMPLEX)
137   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
138 #endif
139   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
140   for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
141 
142   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
143   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
144   if (!fftw->p_forward){ /* create a plan, then excute it */
145     switch (ndim){
146     case 1:
147       fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
148       break;
149     case 2:
150       fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
151       break;
152     case 3:
153       fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
154       break;
155     default:
156       fftw->p_forward = fftw_mpi_plan_dft(ndim1,pdim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
157  //     fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag);
158       break;
159     }
160     fftw->finarray  = x_array;
161     fftw->foutarray = y_array;
162     /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten!
163                 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */
164     fftw_execute(fftw->p_forward);
165   } else { /* use existing plan */
166     if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */
167       fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array);
168     } else {
169       fftw_execute(fftw->p_forward);
170     }
171   }
172   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
173   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
174   PetscFunctionReturn(0);
175 }
176 
177 #undef __FUNCT__
178 #define __FUNCT__ "MatMultTranspose_MPIFFTW"
179 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y)
180 {
181   PetscErrorCode ierr;
182   Mat_FFT        *fft  = (Mat_FFT*)A->data;
183   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
184   PetscScalar    *x_array,*y_array;
185   PetscInt       ndim=fft->ndim,*dim=fft->dim,ctr;
186   MPI_Comm       comm=((PetscObject)A)->comm;
187   ptrdiff_t      ndim1=(ptrdiff_t)ndim,*pdim;
188 
189   PetscFunctionBegin;
190 #if !defined(PETSC_USE_COMPLEX)
191   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
192 #endif
193   ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); // should pdim be a member of Mat_FFTW?
194   for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr];
195 
196   ierr = VecGetArray(x,&x_array);CHKERRQ(ierr);
197   ierr = VecGetArray(y,&y_array);CHKERRQ(ierr);
198   if (!fftw->p_backward){ /* create a plan, then excute it */
199     switch (ndim){
200     case 1:
201       fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
202       break;
203     case 2:
204       fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
205       break;
206     case 3:
207       fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
208       break;
209     default:
210       fftw->p_backward = fftw_mpi_plan_dft(ndim1,pdim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag);
211       break;
212     }
213     fftw->binarray  = x_array;
214     fftw->boutarray = y_array;
215     fftw_execute(fftw->p_backward);CHKERRQ(ierr);
216   } else { /* use existing plan */
217     if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */
218       fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array);
219     } else {
220       fftw_execute(fftw->p_backward);CHKERRQ(ierr);
221     }
222   }
223   ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr);
224   ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr);
225   ierr = PetscFree(pdim);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 #undef __FUNCT__
230 #define __FUNCT__ "MatDestroy_FFTW"
231 PetscErrorCode MatDestroy_FFTW(Mat A)
232 {
233   Mat_FFT        *fft = (Mat_FFT*)A->data;
234   Mat_FFTW       *fftw = (Mat_FFTW*)fft->data;
235   PetscErrorCode ierr;
236 
237   PetscFunctionBegin;
238 #if !defined(PETSC_USE_COMPLEX)
239   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
240 #endif
241   fftw_destroy_plan(fftw->p_forward);
242   fftw_destroy_plan(fftw->p_backward);
243   ierr = PetscFree(fft->data);CHKERRQ(ierr);
244   PetscFunctionReturn(0);
245 }
246 
247 #include <../src/vec/vec/impls/mpi/pvecimpl.h>   /*I  "petscvec.h"   I*/
248 #undef __FUNCT__
249 #define __FUNCT__ "VecDestroy_MPIFFTW"
250 PetscErrorCode VecDestroy_MPIFFTW(Vec v)
251 {
252   PetscErrorCode  ierr;
253   PetscScalar     *array;
254 
255   PetscFunctionBegin;
256 #if !defined(PETSC_USE_COMPLEX)
257   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
258 #endif
259   ierr = VecGetArray(v,&array);CHKERRQ(ierr);
260   fftw_free((fftw_complex*)array);CHKERRQ(ierr);
261   ierr = VecRestoreArray(v,&array);CHKERRQ(ierr);
262   ierr = VecDestroy_MPI(v);CHKERRQ(ierr);
263   PetscFunctionReturn(0);
264 }
265 
266 #undef __FUNCT__
267 #define __FUNCT__ "MatGetVecs_FFTW"
268 /*
269    MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the
270      parallel layout determined by FFTW
271 
272    Collective on Mat
273 
274    Input Parameter:
275 .  mat - the matrix
276 
277    Output Parameter:
278 +   fin - (optional) input vector of forward FFTW
279 -   fout - (optional) output vector of forward FFTW
280 
281   Level: advanced
282 
283 .seealso: MatCreateFFTW()
284 */
285 PetscErrorCode  MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout)
286 {
287   PetscErrorCode ierr;
288   PetscMPIInt    size,rank;
289   MPI_Comm       comm=((PetscObject)A)->comm;
290   Mat_FFT        *fft = (Mat_FFT*)A->data;
291   PetscInt       N=fft->N;
292 
293   PetscFunctionBegin;
294 #if !defined(PETSC_USE_COMPLEX)
295   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers");
296 #endif
297   PetscValidHeaderSpecific(A,MAT_CLASSID,1);
298   PetscValidType(A,1);
299 
300   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
301   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
302   if (size == 1){ /* sequential case */
303     if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);}
304     if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);}
305   } else {        /* mpi case */
306     ptrdiff_t      alloc_local,local_n0,local_0_start;
307     ptrdiff_t      local_n1,local_1_end;
308     PetscInt       ndim=fft->ndim,*dim=fft->dim,n=fft->n,ctr;
309     fftw_complex   *data_fin,*data_fout;
310     ptrdiff_t      ndim1,*pdim;
311     ndim1=(ptrdiff_t) ndim;
312     pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
313 
314     for(ctr=0;ctr<ndim;ctr++)
315         {
316            pdim[ctr] = dim[ctr];
317        }
318 
319     switch (ndim){
320     case 1:
321       /* Get local size */
322 
323       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
324       if (fin) {
325         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
326         ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
327         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
328       }
329       if (fout) {
330         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
331         ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
332         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
333       }
334       break;
335     case 2:
336       /* Get local size */
337       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
338       if (fin) {
339         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
340         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
341         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
342       }
343       if (fout) {
344         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
345         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
346         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
347       }
348       break;
349     case 3:
350       /* Get local size */
351       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
352 //      printf("The quantity n is %d",n);
353       if (fin) {
354         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
355         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
356         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
357       }
358       if (fout) {
359         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
360         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
361         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
362       }
363       break;
364     default:
365       /* Get local size */
366       alloc_local = fftw_mpi_local_size(ndim1,pdim,comm,&local_n0,&local_0_start);
367 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
368 //      printf("The value of alloc local is %d",alloc_local);
369 //      pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
370 //      for(i=0;i<ndim;i++)
371 //         {
372 //          pdim[i]=dim[i];printf("%d",pdim[i]);
373 //         }
374 //      alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
375 //      printf("The quantity n is %d",n);
376       if (fin) {
377         data_fin  = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
378         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr);
379         (*fin)->ops->destroy   = VecDestroy_MPIFFTW;
380       }
381       if (fout) {
382         data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local);
383         ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr);
384         (*fout)->ops->destroy   = VecDestroy_MPIFFTW;
385       }
386       break;
387     }
388   }
389   if (fin){
390     ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr);
391   }
392   if (fout){
393     ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr);
394   }
395   PetscFunctionReturn(0);
396 }
397 
398 EXTERN_C_BEGIN
399 #undef __FUNCT__
400 #define __FUNCT__ "MatCreate_FFTW"
401 /*
402       MatCreate_FFTW - Creates a matrix object that provides FFT
403   via the external package FFTW
404 
405   Options Database Keys:
406 + -mat_fftw_plannerflags - set FFTW planner flags
407 
408    Level: intermediate
409 
410 */
411 PetscErrorCode MatCreate_FFTW(Mat A)
412 {
413   PetscErrorCode ierr;
414   MPI_Comm       comm=((PetscObject)A)->comm;
415   Mat_FFT        *fft=(Mat_FFT*)A->data;
416   Mat_FFTW       *fftw;
417   PetscInt       n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim;
418   const char     *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"};
419   PetscBool      flg;
420   PetscInt       p_flag,partial_dim=1,ctr;
421   PetscMPIInt    size,rank;
422   ptrdiff_t      *pdim;
423 
424   PetscFunctionBegin;
425 #if !defined(PETSC_USE_COMPLEX)
426   SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers");
427 #endif
428 
429   ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
430   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
431   pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t));
432   pdim[0] = dim[0];
433   for(ctr=1;ctr<ndim;ctr++)
434       {
435           partial_dim*=dim[ctr];
436           pdim[ctr] = dim[ctr];
437       }
438 //  printf("partial dimension is %d",partial_dim);
439   if (size == 1) {
440     ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr);
441     n = N;
442   } else {
443     ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end;
444     switch (ndim){
445     case 1:
446       alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end);
447       n = (PetscInt)local_n0;
448       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
449 
450       break;
451     case 2:
452       alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start);
453       /*
454        PetscMPIInt    rank;
455        PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]);
456        PetscSynchronizedFlush(comm);
457        */
458       n = (PetscInt)local_n0*dim[1];
459       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
460       break;
461     case 3:
462       alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start);
463 //      printf("The value of alloc local is %d",alloc_local);
464       n = (PetscInt)local_n0*dim[1]*dim[2];
465       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
466       break;
467     default:
468       alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start);
469 //      printf("The value of alloc local is %d from process %d\n",alloc_local,rank);
470 //      alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start);
471       n = (PetscInt)local_n0*partial_dim;
472 //      printf("New partial dimension is %d %d %d",n,N,ndim);
473       ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr);
474       break;
475     }
476   }
477   ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr);
478 
479   ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr);
480   fft->data = (void*)fftw;
481 
482   fft->n           = n;
483   fftw->p_forward  = 0;
484   fftw->p_backward = 0;
485   fftw->p_flag     = FFTW_ESTIMATE;
486 
487   if (size == 1){
488     A->ops->mult          = MatMult_SeqFFTW;
489     A->ops->multtranspose = MatMultTranspose_SeqFFTW;
490   } else {
491     A->ops->mult          = MatMult_MPIFFTW;
492     A->ops->multtranspose = MatMultTranspose_MPIFFTW;
493   }
494   fft->matdestroy          = MatDestroy_FFTW;
495   A->ops->getvecs       = MatGetVecs_FFTW;
496   A->assembled          = PETSC_TRUE;
497 
498   /* get runtime options */
499   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr);
500     ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr);
501     if (flg) {fftw->p_flag = (unsigned)p_flag;}
502   PetscOptionsEnd();
503   PetscFunctionReturn(0);
504 }
505 EXTERN_C_END
506