1 static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n"; 2 3 /* 4 Usage: 5 mpiexec -n <np> ./ex158 -use_FFTW_interface NO 6 mpiexec -n <np> ./ex158 -use_FFTW_interface YES 7 */ 8 9 #include <petscmat.h> 10 #include <fftw3-mpi.h> 11 12 int main(int argc,char **args) 13 { 14 PetscErrorCode ierr; 15 PetscMPIInt rank,size; 16 PetscInt N0=50,N1=20,N=N0*N1; 17 PetscRandom rdm; 18 PetscScalar a; 19 PetscReal enorm; 20 Vec x,y,z; 21 PetscBool view=PETSC_FALSE,use_interface=PETSC_TRUE; 22 23 ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 24 #if defined(PETSC_USE_COMPLEX) 25 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex"); 26 #endif 27 28 ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158");CHKERRQ(ierr); 29 CHKERRQ(PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158",use_interface, &use_interface, NULL)); 30 ierr = PetscOptionsEnd();CHKERRQ(ierr); 31 32 CHKERRMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 33 CHKERRMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 34 35 CHKERRQ(PetscRandomCreate(PETSC_COMM_WORLD, &rdm)); 36 CHKERRQ(PetscRandomSetFromOptions(rdm)); 37 38 if (!use_interface) { 39 /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */ 40 /*---------------------------------------------------------*/ 41 fftw_plan fplan,bplan; 42 fftw_complex *data_in,*data_out,*data_out2; 43 ptrdiff_t alloc_local,local_n0,local_0_start; 44 45 if (rank == 0) printf("Use FFTW without PETSc-FFTW interface\n"); 46 fftw_mpi_init(); 47 N = N0*N1; 48 alloc_local = fftw_mpi_local_size_2d(N0,N1,PETSC_COMM_WORLD,&local_n0,&local_0_start); 49 50 data_in = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 51 data_out = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 52 data_out2 = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 53 54 CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_in,&x)); 55 CHKERRQ(PetscObjectSetName((PetscObject) x, "Real Space vector")); 56 CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out,&y)); 57 CHKERRQ(PetscObjectSetName((PetscObject) y, "Frequency space vector")); 58 CHKERRQ(VecCreateMPIWithArray(PETSC_COMM_WORLD,1,(PetscInt)local_n0*N1,(PetscInt)N,(const PetscScalar*)data_out2,&z)); 59 CHKERRQ(PetscObjectSetName((PetscObject) z, "Reconstructed vector")); 60 61 fplan = fftw_mpi_plan_dft_2d(N0,N1,data_in,data_out,PETSC_COMM_WORLD,FFTW_FORWARD,FFTW_ESTIMATE); 62 bplan = fftw_mpi_plan_dft_2d(N0,N1,data_out,data_out2,PETSC_COMM_WORLD,FFTW_BACKWARD,FFTW_ESTIMATE); 63 64 CHKERRQ(VecSetRandom(x, rdm)); 65 if (view) CHKERRQ(VecView(x,PETSC_VIEWER_STDOUT_WORLD)); 66 67 fftw_execute(fplan); 68 if (view) CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 69 70 fftw_execute(bplan); 71 72 /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ 73 a = 1.0/(PetscReal)N; 74 CHKERRQ(VecScale(z,a)); 75 if (view) CHKERRQ(VecView(z, PETSC_VIEWER_STDOUT_WORLD)); 76 CHKERRQ(VecAXPY(z,-1.0,x)); 77 CHKERRQ(VecNorm(z,NORM_1,&enorm)); 78 if (enorm > 1.e-11) { 79 CHKERRQ(PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %g\n",(double)enorm)); 80 } 81 82 /* Free spaces */ 83 fftw_destroy_plan(fplan); 84 fftw_destroy_plan(bplan); 85 fftw_free(data_in); CHKERRQ(VecDestroy(&x)); 86 fftw_free(data_out); CHKERRQ(VecDestroy(&y)); 87 fftw_free(data_out2);CHKERRQ(VecDestroy(&z)); 88 89 } else { 90 /* Use PETSc-FFTW interface */ 91 /*-------------------------------------------*/ 92 PetscInt i,*dim,k,DIM; 93 Mat A; 94 Vec input,output; 95 96 N=30; 97 for (i=2; i<3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */ 98 DIM = i; 99 CHKERRQ(PetscMalloc1(i,&dim)); 100 for (k=0; k<i; k++) { 101 dim[k]=30; 102 } 103 N *= dim[i-1]; 104 105 /* Create FFTW object */ 106 if (rank == 0) { 107 CHKERRQ(PetscPrintf(PETSC_COMM_SELF,"Use PETSc-FFTW interface...%d-DIM:%d \n",DIM,N)); 108 } 109 CHKERRQ(MatCreateFFT(PETSC_COMM_WORLD,DIM,dim,MATFFTW,&A)); 110 111 /* Create FFTW vectors that are compatible with parallel layout of A */ 112 CHKERRQ(MatCreateVecsFFTW(A,&x,&y,&z)); 113 CHKERRQ(PetscObjectSetName((PetscObject) x, "Real space vector")); 114 CHKERRQ(PetscObjectSetName((PetscObject) y, "Frequency space vector")); 115 CHKERRQ(PetscObjectSetName((PetscObject) z, "Reconstructed vector")); 116 117 /* Create and set PETSc vector */ 118 CHKERRQ(VecCreate(PETSC_COMM_WORLD,&input)); 119 CHKERRQ(VecSetSizes(input,PETSC_DECIDE,N)); 120 CHKERRQ(VecSetFromOptions(input)); 121 CHKERRQ(VecSetRandom(input,rdm)); 122 CHKERRQ(VecDuplicate(input,&output)); 123 if (view) CHKERRQ(VecView(input,PETSC_VIEWER_STDOUT_WORLD)); 124 125 /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data 126 can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original 127 data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel 128 layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically 129 at the end of last dimension. This padding is required by FFTW to perform parallel real D.F.T. */ 130 CHKERRQ(VecScatterPetscToFFTW(A,input,x));/* buggy for dim = 3, 4... */ 131 132 /* Apply FFTW_FORWARD and FFTW_BACKWARD */ 133 CHKERRQ(MatMult(A,x,y)); 134 if (view) CHKERRQ(VecView(y,PETSC_VIEWER_STDOUT_WORLD)); 135 CHKERRQ(MatMultTranspose(A,y,z)); 136 137 /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc 138 performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of 139 the extra spaces that were artificially padded to perform real parallel transform. */ 140 CHKERRQ(VecScatterFFTWToPetsc(A,z,output)); 141 142 /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ 143 a = 1.0/(PetscReal)N; 144 CHKERRQ(VecScale(output,a)); 145 if (view) CHKERRQ(VecView(output,PETSC_VIEWER_STDOUT_WORLD)); 146 CHKERRQ(VecAXPY(output,-1.0,input)); 147 CHKERRQ(VecNorm(output,NORM_1,&enorm)); 148 if (enorm > 1.e-09 && rank == 0) { 149 CHKERRQ(PetscPrintf(PETSC_COMM_SELF," Error norm of |x - z| %e\n",enorm)); 150 } 151 152 /* Free spaces */ 153 CHKERRQ(PetscFree(dim)); 154 CHKERRQ(VecDestroy(&input)); 155 CHKERRQ(VecDestroy(&output)); 156 CHKERRQ(VecDestroy(&x)); 157 CHKERRQ(VecDestroy(&y)); 158 CHKERRQ(VecDestroy(&z)); 159 CHKERRQ(MatDestroy(&A)); 160 } 161 } 162 CHKERRQ(PetscRandomDestroy(&rdm)); 163 ierr = PetscFinalize(); 164 return ierr; 165 } 166 167 /*TEST 168 169 build: 170 requires: !mpiuni fftw !complex 171 172 test: 173 output_file: output/ex158.out 174 175 test: 176 suffix: 2 177 nsize: 3 178 179 TEST*/ 180