1 /* This program illustrates use of parallel real FFT */ 2 static char help[] = "This program illustrates the use of parallel real 2D fft using fftw without PETSc interface"; 3 4 #include <petscmat.h> 5 #include <fftw3.h> 6 #include <fftw3-mpi.h> 7 8 int main(int argc, char **args) 9 { 10 const ptrdiff_t N0 = 2056, N1 = 2056; 11 fftw_plan bplan, fplan; 12 fftw_complex *out; 13 double *in1, *in2; 14 ptrdiff_t alloc_local, local_n0, local_0_start; 15 ptrdiff_t local_n1, local_1_start; 16 PetscInt i, j; 17 PetscMPIInt size, rank; 18 int n, N, N_factor, NM; 19 PetscScalar one = 2.0, zero = 0.5; 20 PetscScalar two = 4.0, three = 8.0, four = 16.0; 21 PetscScalar a, *x_arr, *y_arr, *z_arr; 22 PetscReal enorm; 23 Vec fin, fout, fout1; 24 Vec ini, final; 25 PetscRandom rnd; 26 PetscInt *indx3, tempindx, low, *indx4, tempindx1; 27 28 PetscFunctionBeginUser; 29 PetscCall(PetscInitialize(&argc, &args, NULL, help)); 30 PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 31 PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 32 33 PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rnd)); 34 35 alloc_local = fftw_mpi_local_size_2d_transposed(N0, N1 / 2 + 1, PETSC_COMM_WORLD, &local_n0, &local_0_start, &local_n1, &local_1_start); 36 #if defined(DEBUGGING) 37 printf("The value alloc_local is %ld from process %d\n", alloc_local, rank); 38 printf("The value local_n0 is %ld from process %d\n", local_n0, rank); 39 printf("The value local_0_start is %ld from process %d\n", local_0_start, rank); 40 /* printf("The value local_n1 is %ld from process %d\n",local_n1,rank); */ 41 /* printf("The value local_1_start is %ld from process %d\n",local_1_start,rank); */ 42 /* printf("The value local_n0 is %ld from process %d\n",local_n0,rank); */ 43 #endif 44 45 /* Allocate space for input and output arrays */ 46 in1 = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 47 in2 = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 48 out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 49 50 N = 2 * N0 * (N1 / 2 + 1); 51 N_factor = N0 * N1; 52 n = 2 * local_n0 * (N1 / 2 + 1); 53 54 /* printf("The value N is %d from process %d\n",N,rank); */ 55 /* printf("The value n is %d from process %d\n",n,rank); */ 56 /* printf("The value n1 is %d from process %d\n",n1,rank);*/ 57 /* Creating data vector and accompanying array with VeccreateMPIWithArray */ 58 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, (PetscScalar *)in1, &fin)); 59 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, (PetscScalar *)out, &fout)); 60 PetscCall(VecCreateMPIWithArray(PETSC_COMM_WORLD, 1, n, N, (PetscScalar *)in2, &fout1)); 61 62 /* Set the vector with random data */ 63 PetscCall(VecSet(fin, zero)); 64 /* for (i=0;i<N0*N1;i++) */ 65 /* { */ 66 /* VecSetValues(fin,1,&i,&one,INSERT_VALUES); */ 67 /* } */ 68 69 /* VecSet(fin,one); */ 70 i = 0; 71 PetscCall(VecSetValues(fin, 1, &i, &one, INSERT_VALUES)); 72 i = 1; 73 PetscCall(VecSetValues(fin, 1, &i, &two, INSERT_VALUES)); 74 i = 4; 75 PetscCall(VecSetValues(fin, 1, &i, &three, INSERT_VALUES)); 76 i = 5; 77 PetscCall(VecSetValues(fin, 1, &i, &four, INSERT_VALUES)); 78 PetscCall(VecAssemblyBegin(fin)); 79 PetscCall(VecAssemblyEnd(fin)); 80 81 PetscCall(VecSet(fout, zero)); 82 PetscCall(VecSet(fout1, zero)); 83 84 /* Get the meaningful portion of array */ 85 PetscCall(VecGetArray(fin, &x_arr)); 86 PetscCall(VecGetArray(fout1, &z_arr)); 87 PetscCall(VecGetArray(fout, &y_arr)); 88 89 fplan = fftw_mpi_plan_dft_r2c_2d(N0, N1, (double *)x_arr, (fftw_complex *)y_arr, PETSC_COMM_WORLD, FFTW_ESTIMATE); 90 bplan = fftw_mpi_plan_dft_c2r_2d(N0, N1, (fftw_complex *)y_arr, (double *)z_arr, PETSC_COMM_WORLD, FFTW_ESTIMATE); 91 92 fftw_execute(fplan); 93 fftw_execute(bplan); 94 95 PetscCall(VecRestoreArray(fin, &x_arr)); 96 PetscCall(VecRestoreArray(fout1, &z_arr)); 97 PetscCall(VecRestoreArray(fout, &y_arr)); 98 99 /* VecView(fin,PETSC_VIEWER_STDOUT_WORLD); */ 100 PetscCall(VecCreate(PETSC_COMM_WORLD, &ini)); 101 PetscCall(VecCreate(PETSC_COMM_WORLD, &final)); 102 PetscCall(VecSetSizes(ini, local_n0 * N1, N0 * N1)); 103 PetscCall(VecSetSizes(final, local_n0 * N1, N0 * N1)); 104 PetscCall(VecSetFromOptions(ini)); 105 PetscCall(VecSetFromOptions(final)); 106 107 if (N1 % 2 == 0) { 108 NM = N1 + 2; 109 } else { 110 NM = N1 + 1; 111 } 112 /*printf("The Value of NM is %d",NM); */ 113 PetscCall(VecGetOwnershipRange(fin, &low, NULL)); 114 /*printf("The local index is %d from %d\n",low,rank); */ 115 PetscCall(PetscMalloc1(local_n0 * N1, &indx3)); 116 PetscCall(PetscMalloc1(local_n0 * N1, &indx4)); 117 for (i = 0; i < local_n0; i++) { 118 for (j = 0; j < N1; j++) { 119 tempindx = i * N1 + j; 120 tempindx1 = i * NM + j; 121 122 indx3[tempindx] = local_0_start * N1 + tempindx; 123 indx4[tempindx] = low + tempindx1; 124 /* printf("index3 %d from proc %d is \n",indx3[tempindx],rank); */ 125 /* printf("index4 %d from proc %d is \n",indx4[tempindx],rank); */ 126 } 127 } 128 129 PetscCall(PetscMalloc2(local_n0 * N1, &x_arr, local_n0 * N1, &y_arr)); /* arr must be allocated for VecGetValues() */ 130 PetscCall(VecGetValues(fin, local_n0 * N1, indx4, (PetscScalar *)x_arr)); 131 PetscCall(VecSetValues(ini, local_n0 * N1, indx3, x_arr, INSERT_VALUES)); 132 133 PetscCall(VecAssemblyBegin(ini)); 134 PetscCall(VecAssemblyEnd(ini)); 135 136 PetscCall(VecGetValues(fout1, local_n0 * N1, indx4, y_arr)); 137 PetscCall(VecSetValues(final, local_n0 * N1, indx3, y_arr, INSERT_VALUES)); 138 PetscCall(VecAssemblyBegin(final)); 139 PetscCall(VecAssemblyEnd(final)); 140 PetscCall(PetscFree2(x_arr, y_arr)); 141 142 /* 143 VecScatter vecscat; 144 IS indx1,indx2; 145 for (i=0;i<N0;i++) { 146 indx = i*NM; 147 ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx1); 148 indx = i*N1; 149 ISCreateStride(PETSC_COMM_WORLD,N1,indx,1,&indx2); 150 VecScatterCreate(fin,indx1,ini,indx2,&vecscat); 151 VecScatterBegin(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); 152 VecScatterEnd(vecscat,fin,ini,INSERT_VALUES,SCATTER_FORWARD); 153 VecScatterBegin(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); 154 VecScatterEnd(vecscat,fout1,final,INSERT_VALUES,SCATTER_FORWARD); 155 } 156 */ 157 158 a = 1.0 / (PetscReal)N_factor; 159 PetscCall(VecScale(fout1, a)); 160 PetscCall(VecScale(final, a)); 161 162 /* VecView(ini,PETSC_VIEWER_STDOUT_WORLD); */ 163 /* VecView(final,PETSC_VIEWER_STDOUT_WORLD); */ 164 PetscCall(VecAXPY(final, -1.0, ini)); 165 166 PetscCall(VecNorm(final, NORM_1, &enorm)); 167 if (enorm > 1.e-10) PetscCall(PetscPrintf(PETSC_COMM_WORLD, " Error norm of |x - z| = %e\n", enorm)); 168 169 /* Execute fftw with function fftw_execute and destroy it after execution */ 170 fftw_destroy_plan(fplan); 171 fftw_destroy_plan(bplan); 172 fftw_free(in1); 173 PetscCall(VecDestroy(&fin)); 174 fftw_free(out); 175 PetscCall(VecDestroy(&fout)); 176 fftw_free(in2); 177 PetscCall(VecDestroy(&fout1)); 178 179 PetscCall(VecDestroy(&ini)); 180 PetscCall(VecDestroy(&final)); 181 182 PetscCall(PetscRandomDestroy(&rnd)); 183 PetscCall(PetscFree(indx3)); 184 PetscCall(PetscFree(indx4)); 185 PetscCall(PetscFinalize()); 186 return 0; 187 } 188 189 /*TEST 190 191 build: 192 requires: !mpiuni fftw !complex 193 194 test: 195 output_file: output/empty.out 196 197 test: 198 suffix: 2 199 nsize: 3 200 output_file: output/empty.out 201 202 TEST*/ 203