/* Provides an interface to the FFTW package. Testing examples can be found in ~src/mat/examples/tests */ #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ EXTERN_C_BEGIN #include EXTERN_C_END typedef struct { ptrdiff_t ndim_fftw,*dim_fftw; fftw_plan p_forward,p_backward; unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be executed for the arrays with which the plan was created */ } Mat_FFTW; extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); extern PetscErrorCode MatDestroy_FFTW(Mat); extern PetscErrorCode VecDestroy_MPIFFTW(Vec); extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); #undef __FUNCT__ #define __FUNCT__ "MatMult_SeqFFTW" PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscScalar *x_array,*y_array; PetscInt ndim=fft->ndim,*dim=fft->dim; PetscFunctionBegin; #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); #endif ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); if (!fftw->p_forward){ /* create a plan, then excute it */ switch (ndim){ case 1: fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); break; case 2: fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); break; case 3: fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); break; default: fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); break; } fftw->finarray = x_array; fftw->foutarray = y_array; /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ fftw_execute(fftw->p_forward); } else { /* use existing plan */ if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); } else { fftw_execute(fftw->p_forward); } } ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultTranspose_SeqFFTW" PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscScalar *x_array,*y_array; PetscInt ndim=fft->ndim,*dim=fft->dim; PetscFunctionBegin; #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); #endif ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); if (!fftw->p_backward){ /* create a plan, then excute it */ switch (ndim){ case 1: fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); break; case 2: fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); break; case 3: fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); break; default: fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); break; } fftw->binarray = x_array; fftw->boutarray = y_array; fftw_execute(fftw->p_backward);CHKERRQ(ierr); } else { /* use existing plan */ if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); } else { fftw_execute(fftw->p_backward);CHKERRQ(ierr); } } ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMult_MPIFFTW" PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscScalar *x_array,*y_array; PetscInt ndim=fft->ndim,*dim=fft->dim; MPI_Comm comm=((PetscObject)A)->comm; // PetscInt ctr; // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; // ndim1=(ptrdiff_t) ndim; // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); // for(ctr=0;ctrp_forward){ /* create a plan, then excute it */ switch (ndim){ case 1: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); #endif break; case 2: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); #else printf("The code comes here \n"); fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); #endif break; case 3: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); #else fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); #endif break; default: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); #else fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); #endif // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); break; } fftw->finarray = x_array; fftw->foutarray = y_array; /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ fftw_execute(fftw->p_forward); } else { /* use existing plan */ if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); } else { fftw_execute(fftw->p_forward); } } ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultTranspose_MPIFFTW" PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscScalar *x_array,*y_array; PetscInt ndim=fft->ndim,*dim=fft->dim; MPI_Comm comm=((PetscObject)A)->comm; // PetscInt ctr; // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; // ndim1=(ptrdiff_t) ndim; // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); // for(ctr=0;ctrp_backward){ /* create a plan, then excute it */ switch (ndim){ case 1: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); #endif break; case 2: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); #else fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); #endif break; case 3: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); #else fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); #endif break; default: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); #else fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); #endif // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); break; } fftw->binarray = x_array; fftw->boutarray = y_array; fftw_execute(fftw->p_backward);CHKERRQ(ierr); } else { /* use existing plan */ if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); } else { fftw_execute(fftw->p_backward);CHKERRQ(ierr); } } ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatDestroy_FFTW" PetscErrorCode MatDestroy_FFTW(Mat A) { Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscErrorCode ierr; PetscFunctionBegin; #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); #endif fftw_destroy_plan(fftw->p_forward); fftw_destroy_plan(fftw->p_backward); ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); ierr = PetscFree(fft->data);CHKERRQ(ierr); PetscFunctionReturn(0); } #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ #undef __FUNCT__ #define __FUNCT__ "VecDestroy_MPIFFTW" PetscErrorCode VecDestroy_MPIFFTW(Vec v) { PetscErrorCode ierr; PetscScalar *array; PetscFunctionBegin; #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); #endif ierr = VecGetArray(v,&array);CHKERRQ(ierr); fftw_free((fftw_complex*)array);CHKERRQ(ierr); ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); ierr = VecDestroy_MPI(v);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetVecs1DC_FFTW" /* MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the parallel layout determined by FFTW-1D */ PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) { PetscErrorCode ierr; PetscMPIInt size,rank; MPI_Comm comm=((PetscObject)A)->comm; Mat_FFT *fft = (Mat_FFT*)A->data; // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscInt N=fft->N; PetscInt ndim=fft->ndim,*dim=fft->dim; ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; ptrdiff_t f_local_n1,f_local_1_end; ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; ptrdiff_t b_local_n1,b_local_1_end; fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; PetscFunctionBegin; #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); #endif ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); if (size == 1){ SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); } else { if (ndim>1){ SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} else { f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); if (fin) { data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } if (bin) { data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); (*bin)->ops->destroy = VecDestroy_MPIFFTW; } if (bout) { data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); (*bout)->ops->destroy = VecDestroy_MPIFFTW; } } if (fin){ ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); } if (fout){ ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); } if (bin){ ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); } if (bout){ ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); } PetscFunctionReturn(0); } } #undef __FUNCT__ #define __FUNCT__ "MatGetVecs_FFTW" /* MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the parallel layout determined by FFTW Collective on Mat Input Parameter: . mat - the matrix Output Parameter: + fin - (optional) input vector of forward FFTW - fout - (optional) output vector of forward FFTW Level: advanced .seealso: MatCreateFFTW() */ PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) { PetscErrorCode ierr; PetscMPIInt size,rank; MPI_Comm comm=((PetscObject)A)->comm; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscInt N=fft->N, N1, n1,vsize; PetscFunctionBegin; //#if !defined(PETSC_USE_COMPLEX) // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); //#endif PetscValidHeaderSpecific(A,MAT_CLASSID,1); PetscValidType(A,1); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); if (size == 1){ /* sequential case */ if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} printf("The code successfully comes at the end of the routine with one processor\n"); } else { /* mpi case */ ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_end; PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; fftw_complex *data_fin,*data_fout; double *data_finr ; ptrdiff_t local_1_start,temp; // PetscInt ctr; // ptrdiff_t ndim1,*pdim; // ndim1=(ptrdiff_t) ndim; // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); // for(ctr=0;ctr1. Check Documentation for MatGetVecs_FFTW1D routine"); alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); if (fin) { data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } break; case 2: #if !defined(PETSC_USE_COMPLEX) alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); if (fin) { data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); //printf("The code comes here with vector size %d\n",vsize); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); #else /* Get local size */ printf("Hope this does not come here"); alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); if (fin) { data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } printf("Hope this does not come here"); #endif break; case 3: /* Get local size */ #if !defined(PETSC_USE_COMPLEX) alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); if (fin) { data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); //printf("The code comes here with vector size %d\n",vsize); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); #else alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); // printf("The quantity n is %d",n); if (fin) { data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } #endif break; default: /* Get local size */ #if !defined(PETSC_USE_COMPLEX) temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; printf("The value of temp is %ld\n",temp); (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; if (fin) { data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); //printf("The code comes here with vector size %d\n",vsize); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } #else alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); // printf("The value of alloc local is %d",alloc_local); // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); // for(i=0;iops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } #endif break; } } // if (fin){ // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); // } // if (fout){ // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); // } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "InputTransformFFT" PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) { PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); PetscFunctionReturn(0); } /* InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block Input A, x, y A - FFTW matrix x - user data Options Database Keys: + -mat_fftw_plannerflags - set FFTW planner flags Level: intermediate */ EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "InputTransformFFT_FTTW" PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; MPI_Comm comm=((PetscObject)A)->comm; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscInt N=fft->N, N1, n1 ,NM; PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; PetscInt i,j,k,rank,size; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start; VecScatter vecscat; IS list1,list2; ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); printf("Local ownership starts at %d\n",low); switch (ndim){ case 1: SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW"); break; case 2: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); printf("Val local_0_start = %ld\n",local_0_start); if (dim[1]%2==0) NM = dim[1]+2; else NM = dim[1]+1; for (i=0;icomm; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscInt N=fft->N, N1, n1 ,NM; PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; PetscInt i,j,k,rank,size; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start; VecScatter vecscat; IS list1,list2; ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); printf("Local ownership starts at %d\n",low); switch (ndim){ case 1: SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW"); break; case 2: alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); printf("Val local_0_start = %ld\n",local_0_start); if (dim[1]%2==0) NM = dim[1]+2; else NM = dim[1]+1; for (i=0;icomm; Mat_FFT *fft=(Mat_FFT*)A->data; Mat_FFTW *fftw; PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; PetscBool flg; PetscInt p_flag,partial_dim=1,ctr,N1; PetscMPIInt size,rank; ptrdiff_t *pdim, temp; ptrdiff_t local_n1,local_1_start; PetscFunctionBegin; //#if !defined(PETSC_USE_COMPLEX) // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); //#endif ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Barrier(PETSC_COMM_WORLD); pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); pdim[0] = dim[0]; for(ctr=1;ctrdata = (void*)fftw; fft->n = n; fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); for(ctr=0;ctrdim_fftw)[ctr]=dim[ctr]; fftw->p_forward = 0; fftw->p_backward = 0; fftw->p_flag = FFTW_ESTIMATE; if (size == 1){ A->ops->mult = MatMult_SeqFFTW; A->ops->multtranspose = MatMultTranspose_SeqFFTW; } else { A->ops->mult = MatMult_MPIFFTW; A->ops->multtranspose = MatMultTranspose_MPIFFTW; } fft->matdestroy = MatDestroy_FFTW; A->ops->getvecs = MatGetVecs_FFTW; A->assembled = PETSC_TRUE; #if !defined(PETSC_USE_COMPLEX) PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); #endif /* get runtime options */ ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); if (flg) {fftw->p_flag = (unsigned)p_flag;} PetscOptionsEnd(); PetscFunctionReturn(0); } EXTERN_C_END