/* Provides an interface to the FFTW package. Testing examples can be found in ~src/mat/examples/tests */ #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ EXTERN_C_BEGIN #include EXTERN_C_END typedef struct { ptrdiff_t ndim_fftw,*dim_fftw; PetscInt partial_dim; fftw_plan p_forward,p_backward; unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be executed for the arrays with which the plan was created */ } Mat_FFTW; extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); extern PetscErrorCode MatDestroy_FFTW(Mat); extern PetscErrorCode VecDestroy_MPIFFTW(Vec); extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); #undef __FUNCT__ #define __FUNCT__ "MatMult_SeqFFTW" PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscScalar *x_array,*y_array; PetscInt ndim=fft->ndim,*dim=fft->dim; PetscFunctionBegin; ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); if (!fftw->p_forward){ /* create a plan, then excute it */ switch (ndim){ case 1: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); #else fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); #endif break; case 2: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); #else fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); #endif break; case 3: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); #else fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); #endif break; default: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); #else fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); #endif break; } fftw->finarray = x_array; fftw->foutarray = y_array; /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ fftw_execute(fftw->p_forward); } else { /* use existing plan */ if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); } else { fftw_execute(fftw->p_forward); } } ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultTranspose_SeqFFTW" PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscScalar *x_array,*y_array; PetscInt ndim=fft->ndim,*dim=fft->dim; PetscFunctionBegin; ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); if (!fftw->p_backward){ /* create a plan, then excute it */ switch (ndim){ case 1: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); #else fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); #endif break; case 2: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); #else fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); #endif break; case 3: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); #else fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); #endif break; default: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); #else fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); #endif break; } fftw->binarray = x_array; fftw->boutarray = y_array; fftw_execute(fftw->p_backward);CHKERRQ(ierr); } else { /* use existing plan */ if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); } else { fftw_execute(fftw->p_backward);CHKERRQ(ierr); } } ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMult_MPIFFTW" PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscScalar *x_array,*y_array; PetscInt ndim=fft->ndim,*dim=fft->dim; MPI_Comm comm=((PetscObject)A)->comm; PetscFunctionBegin; ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); if (!fftw->p_forward){ /* create a plan, then excute it */ switch (ndim){ case 1: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); #else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); #endif break; case 2: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); #else fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); #endif break; case 3: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); #else fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); #endif break; default: #if defined(PETSC_USE_COMPLEX) fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); #else fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); #endif break; } fftw->finarray = x_array; fftw->foutarray = y_array; /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ fftw_execute(fftw->p_forward); } else { /* use existing plan */ if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); } else { fftw_execute(fftw->p_forward); } } ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatMultTranspose_MPIFFTW" PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscScalar *x_array,*y_array; PetscInt ndim=fft->ndim,*dim=fft->dim; MPI_Comm comm=((PetscObject)A)->comm; PetscFunctionBegin; ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); if (!fftw->p_backward){ /* create a plan, then excute it */ switch (ndim){ case 1: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); #else SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); #endif break; case 2: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); #else fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); #endif break; case 3: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); #else fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); #endif break; default: #if defined(PETSC_USE_COMPLEX) fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); #else fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); #endif break; } fftw->binarray = x_array; fftw->boutarray = y_array; fftw_execute(fftw->p_backward);CHKERRQ(ierr); } else { /* use existing plan */ if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); } else { fftw_execute(fftw->p_backward);CHKERRQ(ierr); } } ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatDestroy_FFTW" PetscErrorCode MatDestroy_FFTW(Mat A) { Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscErrorCode ierr; PetscFunctionBegin; fftw_destroy_plan(fftw->p_forward); fftw_destroy_plan(fftw->p_backward); ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); ierr = PetscFree(fft->data);CHKERRQ(ierr); PetscFunctionReturn(0); } #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ #undef __FUNCT__ #define __FUNCT__ "VecDestroy_MPIFFTW" PetscErrorCode VecDestroy_MPIFFTW(Vec v) { PetscErrorCode ierr; PetscScalar *array; PetscFunctionBegin; ierr = VecGetArray(v,&array);CHKERRQ(ierr); fftw_free((fftw_complex*)array);CHKERRQ(ierr); ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); ierr = VecDestroy_MPI(v);CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "MatGetVecs1DC_FFTW" /* MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the parallel layout determined by FFTW-1D */ PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout) { PetscErrorCode ierr; PetscMPIInt size,rank; MPI_Comm comm=((PetscObject)A)->comm; Mat_FFT *fft = (Mat_FFT*)A->data; // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscInt N=fft->N; PetscInt ndim=fft->ndim,*dim=fft->dim; ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; ptrdiff_t f_local_n1,f_local_1_end; ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; ptrdiff_t b_local_n1,b_local_1_end; fftw_complex *data_fin,*data_fout,*data_bout; PetscFunctionBegin; #if !defined(PETSC_USE_COMPLEX) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); #endif ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); if (size == 1){ SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); } else { if (ndim>1){ SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} else { f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); if (fin) { data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } if (bout) { data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); (*bout)->ops->destroy = VecDestroy_MPIFFTW; } } if (fin){ ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); } if (fout){ ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); } if (bout){ ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); } PetscFunctionReturn(0); } } #undef __FUNCT__ #define __FUNCT__ "MatGetVecs_FFTW" /* MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the parallel layout determined by FFTW Collective on Mat Input Parameter: . mat - the matrix Output Parameter: + fin - (optional) input vector of forward FFTW - fout - (optional) output vector of forward FFTW Level: advanced .seealso: MatCreateFFTW() */ PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) { PetscErrorCode ierr; PetscMPIInt size,rank; MPI_Comm comm=((PetscObject)A)->comm; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscInt N=fft->N, N1, n1,vsize; PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; PetscFunctionBegin; PetscValidHeaderSpecific(A,MAT_CLASSID,1); PetscValidType(A,1); ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); if (size == 1){ /* sequential case */ #if defined(PETSC_USE_COMPLEX) if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} #else if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} #endif } else { /* mpi case */ ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_end; fftw_complex *data_fin,*data_fout; double *data_finr ; ptrdiff_t local_1_start,temp; // PetscInt ctr; // ptrdiff_t ndim1,*pdim; // ndim1=(ptrdiff_t) ndim; // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); // for(ctr=0;ctr1. Check Documentation for MatGetVecs_FFTW1D routine"); alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); if (fin) { data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } break; case 2: #if !defined(PETSC_USE_COMPLEX) alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); if (fin) { data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); //printf("The code comes here with vector size %d\n",vsize); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); #else /* Get local size */ printf("Hope this does not come here"); alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); if (fin) { data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } printf("Hope this does not come here"); #endif break; case 3: /* Get local size */ #if !defined(PETSC_USE_COMPLEX) alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); if (fin) { data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); //printf("The code comes here with vector size %d\n",vsize); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); #else alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); // printf("The quantity n is %d",n); if (fin) { data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } #endif break; default: /* Get local size */ #if !defined(PETSC_USE_COMPLEX) temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; printf("The value of temp is %ld\n",temp); (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; if (fin) { data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); //printf("The code comes here with vector size %d\n",vsize); (*fin)->ops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } #else alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); // printf("The value of alloc local is %d",alloc_local); // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); // for(i=0;iops->destroy = VecDestroy_MPIFFTW; } if (fout) { data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); (*fout)->ops->destroy = VecDestroy_MPIFFTW; } #endif break; } } // if (fin){ // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); // } // if (fout){ // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); // } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "InputTransformFFT" PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) { PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); PetscFunctionReturn(0); } /* InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block Input A, x, y A - FFTW matrix x - user data Options Database Keys: + -mat_fftw_plannerflags - set FFTW planner flags Level: intermediate */ EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "InputTransformFFT_FTTW" PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; MPI_Comm comm=((PetscObject)A)->comm; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscInt N=fft->N, N1, n1 ,NM; PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; PetscInt low, *indx1, *indx2, tempindx, tempindx1; PetscInt i,j,k,rank,size,partial_dim; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start,temp; VecScatter vecscat; IS list1,list2; ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); printf("Local ownership starts at %d\n",low); if (size==1) { switch (ndim){ case 1: ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); for (i=0;idim_fftw)[fftw->ndim_fftw-1]; printf("The value of temp is %ld\n",temp); (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; partial_dim = fftw->partial_dim; printf("The value of partial dim is %d\n",partial_dim); ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); printf("Val local_0_start = %ld\n",local_0_start); if (dim[ndim-1]%2==0) NM = 2; else NM = 1; j = low; for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { indx1[i] = local_0_start*partial_dim + i; indx2[i] = j; //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); if (k%dim[ndim-1]==0) { j+=NM;} j++; } ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); ierr = ISDestroy(&list1);CHKERRQ(ierr); ierr = ISDestroy(&list2);CHKERRQ(ierr); ierr = PetscFree(indx1);CHKERRQ(ierr); ierr = PetscFree(indx2);CHKERRQ(ierr); break; } } return 0; } EXTERN_C_END #undef __FUNCT__ #define __FUNCT__ "OutputTransformFFT" PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) { PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); PetscFunctionReturn(0); } /* OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use Input A, x, y A - FFTW matrix x - FFTW vector y - PETSc vector that user can read Options Database Keys: + -mat_fftw_plannerflags - set FFTW planner flags Level: intermediate */ EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "OutputTransformFFT_FTTW" PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) { PetscErrorCode ierr; MPI_Comm comm=((PetscObject)A)->comm; Mat_FFT *fft = (Mat_FFT*)A->data; Mat_FFTW *fftw = (Mat_FFTW*)fft->data; PetscInt N=fft->N, N1, n1 ,NM; PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; PetscInt low, *indx1, *indx2, tempindx, tempindx1; PetscInt i,j,k,rank,size,partial_dim; ptrdiff_t alloc_local,local_n0,local_0_start; ptrdiff_t local_n1,local_1_start,temp; VecScatter vecscat; IS list1,list2; ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); if (size==1){ switch (ndim){ case 1: ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); for (i=0;idim_fftw)[fftw->ndim_fftw-1]; printf("The value of temp is %ld\n",temp); (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; partial_dim = fftw->partial_dim; printf("The value of partial dim is %d\n",partial_dim); ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); printf("Val local_0_start = %ld\n",local_0_start); if (dim[ndim-1]%2==0) NM = 2; else NM = 1; j = low; for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { indx1[i] = local_0_start*partial_dim + i; indx2[i] = j; //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); if (k%dim[ndim-1]==0) { j+=NM;} j++; } ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); //ISView(list1,PETSC_VIEWER_STDOUT_SELF); ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); ierr = ISDestroy(&list1);CHKERRQ(ierr); ierr = ISDestroy(&list2);CHKERRQ(ierr); ierr = PetscFree(indx1);CHKERRQ(ierr); ierr = PetscFree(indx2);CHKERRQ(ierr); break; } } return 0; } EXTERN_C_END EXTERN_C_BEGIN #undef __FUNCT__ #define __FUNCT__ "MatCreate_FFTW" /* MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW Options Database Keys: + -mat_fftw_plannerflags - set FFTW planner flags Level: intermediate */ PetscErrorCode MatCreate_FFTW(Mat A) { PetscErrorCode ierr; MPI_Comm comm=((PetscObject)A)->comm; Mat_FFT *fft=(Mat_FFT*)A->data; Mat_FFTW *fftw; PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; PetscBool flg; PetscInt p_flag,partial_dim=1,ctr,N1; PetscMPIInt size,rank; ptrdiff_t *pdim, temp; ptrdiff_t local_n1,local_1_start; PetscFunctionBegin; ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); ierr = MPI_Barrier(PETSC_COMM_WORLD); pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); pdim[0] = dim[0]; for(ctr=1;ctrdata = (void*)fftw; fft->n = n; fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft fftw->partial_dim = partial_dim; ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); for(ctr=0;ctrdim_fftw)[ctr]=dim[ctr]; fftw->p_forward = 0; fftw->p_backward = 0; fftw->p_flag = FFTW_ESTIMATE; if (size == 1){ A->ops->mult = MatMult_SeqFFTW; A->ops->multtranspose = MatMultTranspose_SeqFFTW; } else { A->ops->mult = MatMult_MPIFFTW; A->ops->multtranspose = MatMultTranspose_MPIFFTW; } fft->matdestroy = MatDestroy_FFTW; // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D A->ops->getvecs = MatGetVecs_FFTW; A->assembled = PETSC_TRUE; #if !defined(PETSC_USE_COMPLEX) PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); #endif /* get runtime options */ ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); if (flg) {fftw->p_flag = (unsigned)p_flag;} PetscOptionsEnd(); PetscFunctionReturn(0); } EXTERN_C_END