1 static char help[] = "Illustrate how to use mpi FFTW and PETSc-FFTW interface \n\n"; 2 3 /* 4 Usage: 5 mpiexec -n <np> ./ex158 -use_FFTW_interface NO 6 mpiexec -n <np> ./ex158 -use_FFTW_interface YES 7 */ 8 9 #include <petscmat.h> 10 #include <fftw3-mpi.h> 11 12 int main(int argc, char **args) { 13 PetscMPIInt rank, size; 14 PetscInt N0 = 50, N1 = 20, N = N0 * N1; 15 PetscRandom rdm; 16 PetscScalar a; 17 PetscReal enorm; 18 Vec x, y, z; 19 PetscBool view = PETSC_FALSE, use_interface = PETSC_TRUE; 20 21 PetscFunctionBeginUser; 22 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 23 #if defined(PETSC_USE_COMPLEX) 24 SETERRQ(PETSC_COMM_WORLD, PETSC_ERR_SUP, "This example requires real numbers. Your current scalar type is complex"); 25 #endif 26 27 PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "FFTW Options", "ex158"); 28 PetscCall(PetscOptionsBool("-use_FFTW_interface", "Use PETSc-FFTW interface", "ex158", use_interface, &use_interface, NULL)); 29 PetscOptionsEnd(); 30 31 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 32 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 33 34 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rdm)); 35 PetscCall(PetscRandomSetFromOptions(rdm)); 36 37 if (!use_interface) { 38 /* Use mpi FFTW without PETSc-FFTW interface, 2D case only */ 39 /*---------------------------------------------------------*/ 40 fftw_plan fplan, bplan; 41 fftw_complex *data_in, *data_out, *data_out2; 42 ptrdiff_t alloc_local, local_n0, local_0_start; 43 44 if (rank == 0) printf("Use FFTW without PETSc-FFTW interface\n"); 45 fftw_mpi_init(); 46 N = N0 * N1; 47 alloc_local = fftw_mpi_local_size_2d(N0, N1, PETSC_COMM_WORLD, &local_n0, &local_0_start); 48 49 data_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 50 data_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 51 data_out2 = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 52 53 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_in, &x)); 54 PetscCall(PetscObjectSetName((PetscObject)x, "Real Space vector")); 55 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out, &y)); 56 PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector")); 57 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, (PetscInt)local_n0 * N1, (PetscInt)N, (const PetscScalar *)data_out2, &z)); 58 PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector")); 59 60 fplan = fftw_mpi_plan_dft_2d(N0, N1, data_in, data_out, PETSC_COMM_WORLD, FFTW_FORWARD, FFTW_ESTIMATE); 61 bplan = fftw_mpi_plan_dft_2d(N0, N1, data_out, data_out2, PETSC_COMM_WORLD, FFTW_BACKWARD, FFTW_ESTIMATE); 62 63 PetscCall(VecSetRandom(x, rdm)); 64 if (view) PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); 65 66 fftw_execute(fplan); 67 if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD)); 68 69 fftw_execute(bplan); 70 71 /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ 72 a = 1.0 / (PetscReal)N; 73 PetscCall(VecScale(z, a)); 74 if (view) PetscCall(VecView(z, PETSC_VIEWER_STDOUT_WORLD)); 75 PetscCall(VecAXPY(z, -1.0, x)); 76 PetscCall(VecNorm(z, NORM_1, &enorm)); 77 if (enorm > 1.e-11) { PetscCall(PetscPrintf(PETSC_COMM_SELF, " Error norm of |x - z| %g\n", (double)enorm)); } 78 79 /* Free spaces */ 80 fftw_destroy_plan(fplan); 81 fftw_destroy_plan(bplan); 82 fftw_free(data_in); 83 PetscCall(VecDestroy(&x)); 84 fftw_free(data_out); 85 PetscCall(VecDestroy(&y)); 86 fftw_free(data_out2); 87 PetscCall(VecDestroy(&z)); 88 89 } else { 90 /* Use PETSc-FFTW interface */ 91 /*-------------------------------------------*/ 92 PetscInt i, *dim, k, DIM; 93 Mat A; 94 Vec input, output; 95 96 N = 30; 97 for (i = 2; i < 3; i++) { /* (i=3,4: -- error in VecScatterPetscToFFTW(A,input,x); */ 98 DIM = i; 99 PetscCall(PetscMalloc1(i, &dim)); 100 for (k = 0; k < i; k++) { dim[k] = 30; } 101 N *= dim[i - 1]; 102 103 /* Create FFTW object */ 104 if (rank == 0) { PetscCall(PetscPrintf(PETSC_COMM_SELF, "Use PETSc-FFTW interface...%d-DIM:%d \n", DIM, N)); } 105 PetscCall(MatCreateFFT(PETSC_COMM_WORLD, DIM, dim, MATFFTW, &A)); 106 107 /* Create FFTW vectors that are compatible with parallel layout of A */ 108 PetscCall(MatCreateVecsFFTW(A, &x, &y, &z)); 109 PetscCall(PetscObjectSetName((PetscObject)x, "Real space vector")); 110 PetscCall(PetscObjectSetName((PetscObject)y, "Frequency space vector")); 111 PetscCall(PetscObjectSetName((PetscObject)z, "Reconstructed vector")); 112 113 /* Create and set PETSc vector */ 114 PetscCall(VecCreate(PETSC_COMM_WORLD, &input)); 115 PetscCall(VecSetSizes(input, PETSC_DECIDE, N)); 116 PetscCall(VecSetFromOptions(input)); 117 PetscCall(VecSetRandom(input, rdm)); 118 PetscCall(VecDuplicate(input, &output)); 119 if (view) PetscCall(VecView(input, PETSC_VIEWER_STDOUT_WORLD)); 120 121 /* Vector input is copied to another vector x using VecScatterPetscToFFTW. This is because the user data 122 can have any parallel layout. But FFTW requires special parallel layout of the data. Hence the original 123 data which is in the vector "input" here, needs to be copied to a vector x, which has the correct parallel 124 layout for FFTW. Also, during parallel real transform, this pads extra zeros automatically 125 at the end of last dimension. This padding is required by FFTW to perform parallel real D.F.T. */ 126 PetscCall(VecScatterPetscToFFTW(A, input, x)); /* buggy for dim = 3, 4... */ 127 128 /* Apply FFTW_FORWARD and FFTW_BACKWARD */ 129 PetscCall(MatMult(A, x, y)); 130 if (view) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_WORLD)); 131 PetscCall(MatMultTranspose(A, y, z)); 132 133 /* Output from Backward DFT needs to be modified to obtain user readable data the routine VecScatterFFTWToPetsc 134 performs the job. In some sense this is the reverse operation of VecScatterPetscToFFTW. This routine gets rid of 135 the extra spaces that were artificially padded to perform real parallel transform. */ 136 PetscCall(VecScatterFFTWToPetsc(A, z, output)); 137 138 /* Compare x and z. FFTW computes an unnormalized DFT, thus z = N*x */ 139 a = 1.0 / (PetscReal)N; 140 PetscCall(VecScale(output, a)); 141 if (view) PetscCall(VecView(output, PETSC_VIEWER_STDOUT_WORLD)); 142 PetscCall(VecAXPY(output, -1.0, input)); 143 PetscCall(VecNorm(output, NORM_1, &enorm)); 144 if (enorm > 1.e-09 && rank == 0) { PetscCall(PetscPrintf(PETSC_COMM_SELF, " Error norm of |x - z| %e\n", enorm)); } 145 146 /* Free spaces */ 147 PetscCall(PetscFree(dim)); 148 PetscCall(VecDestroy(&input)); 149 PetscCall(VecDestroy(&output)); 150 PetscCall(VecDestroy(&x)); 151 PetscCall(VecDestroy(&y)); 152 PetscCall(VecDestroy(&z)); 153 PetscCall(MatDestroy(&A)); 154 } 155 } 156 PetscCall(PetscRandomDestroy(&rdm)); 157 PetscCall(PetscFinalize()); 158 return 0; 159 } 160 161 /*TEST 162 163 build: 164 requires: !mpiuni fftw !complex 165 166 test: 167 output_file: output/ex158.out 168 169 test: 170 suffix: 2 171 nsize: 3 172 173 TEST*/ 174