1 /* This program illustrates use of parallel real FFT */ 2 static char help[] = "This program illustrates the use of parallel real 2D fft using fftw without PETSc interface"; 3 4 #include <petscmat.h> 5 #include <fftw3.h> 6 #include <fftw3-mpi.h> 7 8 int main(int argc, char **args) { 9 const ptrdiff_t N0 = 2056, N1 = 2056; 10 fftw_plan bplan, fplan; 11 fftw_complex *out; 12 double *in1, *in2; 13 ptrdiff_t alloc_local, local_n0, local_0_start; 14 ptrdiff_t local_n1, local_1_start; 15 PetscInt i, j; 16 PetscMPIInt size, rank; 17 int n, N, N_factor, NM; 18 PetscScalar one = 2.0, zero = 0.5; 19 PetscScalar two = 4.0, three = 8.0, four = 16.0; 20 PetscScalar a, *x_arr, *y_arr, *z_arr; 21 PetscReal enorm; 22 Vec fin, fout, fout1; 23 Vec ini, final; 24 PetscRandom rnd; 25 PetscInt *indx3, tempindx, low, *indx4, tempindx1; 26 27 PetscFunctionBeginUser; 28 PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 29 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 30 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 31 32 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rnd)); 33 34 alloc_local = fftw_mpi_local_size_2d_transposed(N0, N1 / 2 + 1, PETSC_COMM_WORLD, &local_n0, &local_0_start, &local_n1, &local_1_start); 35 #if defined(DEBUGGING) 36 printf("The value alloc_local is %ld from process %d\n", alloc_local, rank); 37 printf("The value local_n0 is %ld from process %d\n", local_n0, rank); 38 printf("The value local_0_start is %ld from process %d\n", local_0_start, rank); 39 /* printf("The value local_n1 is %ld from process %d\n",local_n1,rank); */ 40 /* printf("The value local_1_start is %ld from process %d\n",local_1_start,rank); */ 41 /* printf("The value local_n0 is %ld from process %d\n",local_n0,rank); */ 42 #endif 43 44 /* Allocate space for input and output arrays */ 45 in1 = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 46 in2 = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 47 out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 48 49 N = 2 * N0 * (N1 / 2 + 1); 50 N_factor = N0 * N1; 51 n = 2 * local_n0 * (N1 / 2 + 1); 52 53 /* printf("The value N is %d from process %d\n",N,rank); */ 54 /* printf("The value n is %d from process %d\n",n,rank); */ 55 /* printf("The value n1 is %d from process %d\n",n1,rank);*/ 56 /* Creating data vector and accompanying array with VeccreateMPIWithArray */ 57 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, (PetscScalar *)in1, &fin)); 58 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, (PetscScalar *)out, &fout)); 59 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, (PetscScalar *)in2, &fout1)); 60 61 /* Set the vector with random data */ 62 PetscCall(VecSet(fin, zero)); 63 /* for (i=0;i<N0*N1;i++) */ 64 /* { */ 65 /* VecSetValues(fin,1,&i,&one,INSERT_VALUES); */ 66 /* } */ 67 68 /* VecSet(fin,one); */ 69 i = 0; 70 PetscCall(VecSetValues(fin, 1, &i, &one, INSERT_VALUES)); 71 i = 1; 72 PetscCall(VecSetValues(fin, 1, &i, &two, INSERT_VALUES)); 73 i = 4; 74 PetscCall(VecSetValues(fin, 1, &i, &three, INSERT_VALUES)); 75 i = 5; 76 PetscCall(VecSetValues(fin, 1, &i, &four, INSERT_VALUES)); 77 PetscCall(VecAssemblyBegin(fin)); 78 PetscCall(VecAssemblyEnd(fin)); 79 80 PetscCall(VecSet(fout, zero)); 81 PetscCall(VecSet(fout1, zero)); 82 83 /* Get the meaningful portion of array */ 84 PetscCall(VecGetArray(fin, &x_arr)); 85 PetscCall(VecGetArray(fout1, &z_arr)); 86 PetscCall(VecGetArray(fout, &y_arr)); 87 88 fplan = fftw_mpi_plan_dft_r2c_2d(N0, N1, (double *)x_arr, (fftw_complex *)y_arr, PETSC_COMM_WORLD, FFTW_ESTIMATE); 89 bplan = fftw_mpi_plan_dft_c2r_2d(N0, N1, (fftw_complex *)y_arr, (double *)z_arr, PETSC_COMM_WORLD, FFTW_ESTIMATE); 90 91 fftw_execute(fplan); 92 fftw_execute(bplan); 93 94 PetscCall(VecRestoreArray(fin, &x_arr)); 95 PetscCall(VecRestoreArray(fout1, &z_arr)); 96 PetscCall(VecRestoreArray(fout, &y_arr)); 97 98 /* VecView(fin,PETSC_VIEWER_STDOUT_WORLD); */ 99 PetscCall(VecCreate(PETSC_COMM_WORLD, &ini)); 100 PetscCall(VecCreate(PETSC_COMM_WORLD, &final)); 101 PetscCall(VecSetSizes(ini, local_n0 * N1, N0 * N1)); 102 PetscCall(VecSetSizes(final, local_n0 * N1, N0 * N1)); 103 PetscCall(VecSetFromOptions(ini)); 104 PetscCall(VecSetFromOptions(final)); 105 106 if (N1 % 2 == 0) { 107 NM = N1 + 2; 108 } else { 109 NM = N1 + 1; 110 } 111 /*printf("The Value of NM is %d",NM); */ 112 PetscCall(VecGetOwnershipRange(fin, &low, NULL)); 113 /*printf("The local index is %d from %d\n",low,rank); */ 114 PetscCall(PetscMalloc1(local_n0 * N1, &indx3)); 115 PetscCall(PetscMalloc1(local_n0 * N1, &indx4)); 116 for (i = 0; i < local_n0; i++) { 117 for (j = 0; j < N1; j++) { 118 tempindx = i * N1 + j; 119 tempindx1 = i * NM + j; 120 121 indx3[tempindx] = local_0_start * N1 + tempindx; 122 indx4[tempindx] = low + tempindx1; 123 /* printf("index3 %d from proc %d is \n",indx3[tempindx],rank); */ 124 /* printf("index4 %d from proc %d is \n",indx4[tempindx],rank); */ 125 } 126 } 127 128 PetscCall(PetscMalloc2(local_n0 * N1, &x_arr, local_n0 * N1, &y_arr)); /* arr must be allocated for VecGetValues() */ 129 PetscCall(VecGetValues(fin, local_n0 * N1, indx4, (PetscScalar *)x_arr)); 130 PetscCall(VecSetValues(ini, local_n0 * N1, indx3, x_arr, INSERT_VALUES)); 131 132 PetscCall(VecAssemblyBegin(ini)); 133 PetscCall(VecAssemblyEnd(ini)); 134 135 PetscCall(VecGetValues(fout1, local_n0 * N1, indx4, y_arr)); 136 PetscCall(VecSetValues(final, local_n0 * N1, indx3, y_arr, INSERT_VALUES)); 137 PetscCall(VecAssemblyBegin(final)); 138 PetscCall(VecAssemblyEnd(final)); 139 PetscCall(PetscFree2(x_arr, y_arr)); 140 141 /* 142 VecScatter vecscat; 143 IS indx1,indx2; 144 for (i=0;i<N0;i++) { 145 indx = i*NM; 146 ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx1); 147 indx = i*N1; 148 ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx2); 149 VecScatterCreate(fin,indx1,ini,indx2,&vecscat); 150 VecScatterBegin(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); 151 VecScatterEnd(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); 152 VecScatterBegin(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); 153 VecScatterEnd(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); 154 } 155 */ 156 157 a = 1.0 / (PetscReal)N_factor; 158 PetscCall(VecScale(fout1, a)); 159 PetscCall(VecScale(final, a)); 160 161 /* VecView(ini,PETSC_VIEWER_STDOUT_WORLD); */ 162 /* VecView(final,PETSC_VIEWER_STDOUT_WORLD); */ 163 PetscCall(VecAXPY(final, -1.0, ini)); 164 165 PetscCall(VecNorm(final, NORM_1, &enorm)); 166 if (enorm > 1.e-10) { PetscCall(PetscPrintf(PETSC_COMM_WORLD, " Error norm of |x - z| = %e\n", enorm)); } 167 168 /* Execute fftw with function fftw_execute and destroy it after execution */ 169 fftw_destroy_plan(fplan); 170 fftw_destroy_plan(bplan); 171 fftw_free(in1); 172 PetscCall(VecDestroy(&fin)); 173 fftw_free(out); 174 PetscCall(VecDestroy(&fout)); 175 fftw_free(in2); 176 PetscCall(VecDestroy(&fout1)); 177 178 PetscCall(VecDestroy(&ini)); 179 PetscCall(VecDestroy(&final)); 180 181 PetscCall(PetscRandomDestroy(&rnd)); 182 PetscCall(PetscFree(indx3)); 183 PetscCall(PetscFree(indx4)); 184 PetscCall(PetscFinalize()); 185 return 0; 186 } 187 188 /*TEST 189 190 build: 191 requires: !mpiuni fftw !complex 192 193 test: 194 output_file: output/ex144.out 195 196 test: 197 suffix: 2 198 nsize: 3 199 output_file: output/ex144.out 200 201 TEST*/ 202