1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #if !PetscDefined(HAVE_MPIUNI) 10 #include <fftw3-mpi.h> 11 #else 12 #include <fftw3.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 ptrdiff_t ndim_fftw, *dim_fftw; 18 #if defined(PETSC_USE_64BIT_INDICES) 19 fftw_iodim64 *iodims; 20 #else 21 fftw_iodim *iodims; 22 #endif 23 PetscInt partial_dim; 24 fftw_plan p_forward, p_backward; 25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26 PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays becaue fftw plan should be 27 executed for the arrays with which the plan was created */ 28 } Mat_FFTW; 29 30 extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 32 extern PetscErrorCode MatDestroy_FFTW(Mat); 33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 34 #if !PetscDefined(HAVE_MPIUNI) 35 extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 38 #endif 39 40 /* 41 MatMult_SeqFFTW performs forward DFT 42 Input parameter: 43 A - the matrix 44 x - the vector on which FDFT will be performed 45 46 Output parameter: 47 y - vector that stores result of FDFT 48 */ 49 PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) { 50 Mat_FFT *fft = (Mat_FFT *)A->data; 51 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 52 const PetscScalar *x_array; 53 PetscScalar *y_array; 54 #if defined(PETSC_USE_COMPLEX) 55 #if defined(PETSC_USE_64BIT_INDICES) 56 fftw_iodim64 *iodims; 57 #else 58 fftw_iodim *iodims; 59 #endif 60 PetscInt i; 61 #endif 62 PetscInt ndim = fft->ndim, *dim = fft->dim; 63 64 PetscFunctionBegin; 65 PetscCall(VecGetArrayRead(x, &x_array)); 66 PetscCall(VecGetArray(y, &y_array)); 67 if (!fftw->p_forward) { /* create a plan, then execute it */ 68 switch (ndim) { 69 case 1: 70 #if defined(PETSC_USE_COMPLEX) 71 fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 72 #else 73 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 74 #endif 75 break; 76 case 2: 77 #if defined(PETSC_USE_COMPLEX) 78 fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 79 #else 80 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 81 #endif 82 break; 83 case 3: 84 #if defined(PETSC_USE_COMPLEX) 85 fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 86 #else 87 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 88 #endif 89 break; 90 default: 91 #if defined(PETSC_USE_COMPLEX) 92 iodims = fftw->iodims; 93 #if defined(PETSC_USE_64BIT_INDICES) 94 if (ndim) { 95 iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 96 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 97 for (i = ndim - 2; i >= 0; --i) { 98 iodims[i].n = (ptrdiff_t)dim[i]; 99 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 100 } 101 } 102 fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 103 #else 104 if (ndim) { 105 iodims[ndim - 1].n = (int)dim[ndim - 1]; 106 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 107 for (i = ndim - 2; i >= 0; --i) { 108 iodims[i].n = (int)dim[i]; 109 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 110 } 111 } 112 fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 113 #endif 114 115 #else 116 fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 117 #endif 118 break; 119 } 120 fftw->finarray = (PetscScalar *)x_array; 121 fftw->foutarray = y_array; 122 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 123 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 124 fftw_execute(fftw->p_forward); 125 } else { /* use existing plan */ 126 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 127 #if defined(PETSC_USE_COMPLEX) 128 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 129 #else 130 fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 131 #endif 132 } else { 133 fftw_execute(fftw->p_forward); 134 } 135 } 136 PetscCall(VecRestoreArray(y, &y_array)); 137 PetscCall(VecRestoreArrayRead(x, &x_array)); 138 PetscFunctionReturn(0); 139 } 140 141 /* MatMultTranspose_SeqFFTW performs serial backward DFT 142 Input parameter: 143 A - the matrix 144 x - the vector on which BDFT will be performed 145 146 Output parameter: 147 y - vector that stores result of BDFT 148 */ 149 150 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) { 151 Mat_FFT *fft = (Mat_FFT *)A->data; 152 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 153 const PetscScalar *x_array; 154 PetscScalar *y_array; 155 PetscInt ndim = fft->ndim, *dim = fft->dim; 156 #if defined(PETSC_USE_COMPLEX) 157 #if defined(PETSC_USE_64BIT_INDICES) 158 fftw_iodim64 *iodims = fftw->iodims; 159 #else 160 fftw_iodim *iodims = fftw->iodims; 161 #endif 162 #endif 163 164 PetscFunctionBegin; 165 PetscCall(VecGetArrayRead(x, &x_array)); 166 PetscCall(VecGetArray(y, &y_array)); 167 if (!fftw->p_backward) { /* create a plan, then execute it */ 168 switch (ndim) { 169 case 1: 170 #if defined(PETSC_USE_COMPLEX) 171 fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 172 #else 173 fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 174 #endif 175 break; 176 case 2: 177 #if defined(PETSC_USE_COMPLEX) 178 fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 179 #else 180 fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 181 #endif 182 break; 183 case 3: 184 #if defined(PETSC_USE_COMPLEX) 185 fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 186 #else 187 fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 188 #endif 189 break; 190 default: 191 #if defined(PETSC_USE_COMPLEX) 192 #if defined(PETSC_USE_64BIT_INDICES) 193 fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 194 #else 195 fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 196 #endif 197 #else 198 fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 199 #endif 200 break; 201 } 202 fftw->binarray = (PetscScalar *)x_array; 203 fftw->boutarray = y_array; 204 fftw_execute(fftw->p_backward); 205 } else { /* use existing plan */ 206 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 207 #if defined(PETSC_USE_COMPLEX) 208 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 209 #else 210 fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 211 #endif 212 } else { 213 fftw_execute(fftw->p_backward); 214 } 215 } 216 PetscCall(VecRestoreArray(y, &y_array)); 217 PetscCall(VecRestoreArrayRead(x, &x_array)); 218 PetscFunctionReturn(0); 219 } 220 221 #if !PetscDefined(HAVE_MPIUNI) 222 /* MatMult_MPIFFTW performs forward DFT in parallel 223 Input parameter: 224 A - the matrix 225 x - the vector on which FDFT will be performed 226 227 Output parameter: 228 y - vector that stores result of FDFT 229 */ 230 PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) { 231 Mat_FFT *fft = (Mat_FFT *)A->data; 232 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 233 const PetscScalar *x_array; 234 PetscScalar *y_array; 235 PetscInt ndim = fft->ndim, *dim = fft->dim; 236 MPI_Comm comm; 237 238 PetscFunctionBegin; 239 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 240 PetscCall(VecGetArrayRead(x, &x_array)); 241 PetscCall(VecGetArray(y, &y_array)); 242 if (!fftw->p_forward) { /* create a plan, then execute it */ 243 switch (ndim) { 244 case 1: 245 #if defined(PETSC_USE_COMPLEX) 246 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 247 #else 248 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 249 #endif 250 break; 251 case 2: 252 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 253 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 254 #else 255 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 256 #endif 257 break; 258 case 3: 259 #if defined(PETSC_USE_COMPLEX) 260 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 261 #else 262 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 263 #endif 264 break; 265 default: 266 #if defined(PETSC_USE_COMPLEX) 267 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 268 #else 269 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 270 #endif 271 break; 272 } 273 fftw->finarray = (PetscScalar *)x_array; 274 fftw->foutarray = y_array; 275 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 276 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 277 fftw_execute(fftw->p_forward); 278 } else { /* use existing plan */ 279 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 280 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 281 } else { 282 fftw_execute(fftw->p_forward); 283 } 284 } 285 PetscCall(VecRestoreArray(y, &y_array)); 286 PetscCall(VecRestoreArrayRead(x, &x_array)); 287 PetscFunctionReturn(0); 288 } 289 290 /* 291 MatMultTranspose_MPIFFTW performs parallel backward DFT 292 Input parameter: 293 A - the matrix 294 x - the vector on which BDFT will be performed 295 296 Output parameter: 297 y - vector that stores result of BDFT 298 */ 299 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) { 300 Mat_FFT *fft = (Mat_FFT *)A->data; 301 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 302 const PetscScalar *x_array; 303 PetscScalar *y_array; 304 PetscInt ndim = fft->ndim, *dim = fft->dim; 305 MPI_Comm comm; 306 307 PetscFunctionBegin; 308 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 309 PetscCall(VecGetArrayRead(x, &x_array)); 310 PetscCall(VecGetArray(y, &y_array)); 311 if (!fftw->p_backward) { /* create a plan, then execute it */ 312 switch (ndim) { 313 case 1: 314 #if defined(PETSC_USE_COMPLEX) 315 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 316 #else 317 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 318 #endif 319 break; 320 case 2: 321 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 322 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 323 #else 324 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 325 #endif 326 break; 327 case 3: 328 #if defined(PETSC_USE_COMPLEX) 329 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 330 #else 331 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 332 #endif 333 break; 334 default: 335 #if defined(PETSC_USE_COMPLEX) 336 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 337 #else 338 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 339 #endif 340 break; 341 } 342 fftw->binarray = (PetscScalar *)x_array; 343 fftw->boutarray = y_array; 344 fftw_execute(fftw->p_backward); 345 } else { /* use existing plan */ 346 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 347 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 348 } else { 349 fftw_execute(fftw->p_backward); 350 } 351 } 352 PetscCall(VecRestoreArray(y, &y_array)); 353 PetscCall(VecRestoreArrayRead(x, &x_array)); 354 PetscFunctionReturn(0); 355 } 356 #endif 357 358 PetscErrorCode MatDestroy_FFTW(Mat A) { 359 Mat_FFT *fft = (Mat_FFT *)A->data; 360 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 361 362 PetscFunctionBegin; 363 fftw_destroy_plan(fftw->p_forward); 364 fftw_destroy_plan(fftw->p_backward); 365 if (fftw->iodims) free(fftw->iodims); 366 PetscCall(PetscFree(fftw->dim_fftw)); 367 PetscCall(PetscFree(fft->data)); 368 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 369 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 370 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 371 #if !PetscDefined(HAVE_MPIUNI) 372 fftw_mpi_cleanup(); 373 #endif 374 PetscFunctionReturn(0); 375 } 376 377 #if !PetscDefined(HAVE_MPIUNI) 378 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 379 PetscErrorCode VecDestroy_MPIFFTW(Vec v) { 380 PetscScalar *array; 381 382 PetscFunctionBegin; 383 PetscCall(VecGetArray(v, &array)); 384 fftw_free((fftw_complex *)array); 385 PetscCall(VecRestoreArray(v, &array)); 386 PetscCall(VecDestroy_MPI(v)); 387 PetscFunctionReturn(0); 388 } 389 #endif 390 391 #if !PetscDefined(HAVE_MPIUNI) 392 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) { 393 Mat A; 394 395 PetscFunctionBegin; 396 PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 397 PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 398 PetscFunctionReturn(0); 399 } 400 401 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) { 402 Mat A; 403 404 PetscFunctionBegin; 405 PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 406 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 407 PetscFunctionReturn(0); 408 } 409 410 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) { 411 Mat A; 412 413 PetscFunctionBegin; 414 PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 415 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 416 PetscFunctionReturn(0); 417 } 418 #endif 419 420 /*@ 421 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 422 parallel layout determined by FFTW 423 424 Collective on Mat 425 426 Input Parameter: 427 . A - the matrix 428 429 Output Parameters: 430 + x - (optional) input vector of forward FFTW 431 . y - (optional) output vector of forward FFTW 432 - z - (optional) output vector of backward FFTW 433 434 Level: advanced 435 436 Note: The parallel layout of output of forward FFTW is always same as the input 437 of the backward FFTW. But parallel layout of the input vector of forward 438 FFTW might not be same as the output of backward FFTW. 439 Also note that we need to provide enough space while doing parallel real transform. 440 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 441 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 442 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 443 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 444 zeros if it is odd only one zero is needed. 445 Lastly one needs some scratch space at the end of data set in each process. alloc_local 446 figures out how much space is needed, i.e. it figures out the data+scratch space for 447 each processor and returns that. 448 449 .seealso: `MatCreateFFT()` 450 @*/ 451 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) { 452 PetscFunctionBegin; 453 PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 454 PetscFunctionReturn(0); 455 } 456 457 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) { 458 PetscMPIInt size, rank; 459 MPI_Comm comm; 460 Mat_FFT *fft = (Mat_FFT *)A->data; 461 462 PetscFunctionBegin; 463 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 464 PetscValidType(A, 1); 465 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 466 467 PetscCallMPI(MPI_Comm_size(comm, &size)); 468 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 469 if (size == 1) { /* sequential case */ 470 #if defined(PETSC_USE_COMPLEX) 471 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 472 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 473 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 474 #else 475 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 476 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 477 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 478 #endif 479 #if !PetscDefined(HAVE_MPIUNI) 480 } else { /* parallel cases */ 481 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 482 PetscInt ndim = fft->ndim, *dim = fft->dim; 483 ptrdiff_t alloc_local, local_n0, local_0_start; 484 ptrdiff_t local_n1; 485 fftw_complex *data_fout; 486 ptrdiff_t local_1_start; 487 #if defined(PETSC_USE_COMPLEX) 488 fftw_complex *data_fin, *data_bout; 489 #else 490 double *data_finr, *data_boutr; 491 PetscInt n1, N1; 492 ptrdiff_t temp; 493 #endif 494 495 switch (ndim) { 496 case 1: 497 #if !defined(PETSC_USE_COMPLEX) 498 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 499 #else 500 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 501 if (fin) { 502 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 503 PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 504 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 505 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 506 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 507 } 508 if (fout) { 509 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 510 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 511 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 512 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 513 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 516 if (bout) { 517 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 518 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 519 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 520 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 521 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 522 } 523 break; 524 #endif 525 case 2: 526 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 527 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 528 N1 = 2 * dim[0] * (dim[1] / 2 + 1); 529 n1 = 2 * local_n0 * (dim[1] / 2 + 1); 530 if (fin) { 531 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 532 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 533 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 534 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 535 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 536 } 537 if (fout) { 538 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 539 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 540 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 541 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 542 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 543 } 544 if (bout) { 545 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 546 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 547 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 548 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 549 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 550 } 551 #else 552 /* Get local size */ 553 alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 554 if (fin) { 555 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 556 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 557 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 558 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 559 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 560 } 561 if (fout) { 562 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 563 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 564 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 565 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 566 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 567 } 568 if (bout) { 569 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 570 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 571 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 572 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 573 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 574 } 575 #endif 576 break; 577 case 3: 578 #if !defined(PETSC_USE_COMPLEX) 579 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 580 N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 581 n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 582 if (fin) { 583 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 584 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 585 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 586 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 587 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 588 } 589 if (fout) { 590 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 591 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 592 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 593 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 594 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 595 } 596 if (bout) { 597 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 598 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 599 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 600 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 601 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 602 } 603 #else 604 alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 605 if (fin) { 606 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 607 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 608 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 609 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 610 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 611 } 612 if (fout) { 613 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 614 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 615 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 616 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 617 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 618 } 619 if (bout) { 620 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 621 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 622 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 623 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 624 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 625 } 626 #endif 627 break; 628 default: 629 #if !defined(PETSC_USE_COMPLEX) 630 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 631 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 632 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 633 N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 634 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 635 636 if (fin) { 637 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 638 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 639 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 640 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 641 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 642 } 643 if (fout) { 644 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 645 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 646 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 647 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 648 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 649 } 650 if (bout) { 651 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 652 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 653 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 654 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 655 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 656 } 657 #else 658 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 659 if (fin) { 660 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 661 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 662 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 663 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 664 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 665 } 666 if (fout) { 667 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 668 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 669 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 670 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 671 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 672 } 673 if (bout) { 674 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 675 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 676 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 677 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 678 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 679 } 680 #endif 681 break; 682 } 683 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 684 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 685 memory leaks. We void these pointers here to avoid acident uses. 686 */ 687 if (fin) (*fin)->ops->replacearray = NULL; 688 if (fout) (*fout)->ops->replacearray = NULL; 689 if (bout) (*bout)->ops->replacearray = NULL; 690 #endif 691 } 692 PetscFunctionReturn(0); 693 } 694 695 /*@ 696 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 697 698 Collective on Mat 699 700 Input Parameters: 701 + A - FFTW matrix 702 - x - the PETSc vector 703 704 Output Parameters: 705 . y - the FFTW vector 706 707 Options Database Keys: 708 . -mat_fftw_plannerflags - set FFTW planner flags 709 710 Level: intermediate 711 712 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 713 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 714 zeros. This routine does that job by scattering operation. 715 716 .seealso: `VecScatterFFTWToPetsc()` 717 @*/ 718 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) { 719 PetscFunctionBegin; 720 PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 721 PetscFunctionReturn(0); 722 } 723 724 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) { 725 MPI_Comm comm; 726 Mat_FFT *fft = (Mat_FFT *)A->data; 727 PetscInt low; 728 PetscMPIInt rank, size; 729 PetscInt vsize, vsize1; 730 VecScatter vecscat; 731 IS list1; 732 733 PetscFunctionBegin; 734 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 735 PetscCallMPI(MPI_Comm_size(comm, &size)); 736 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 737 PetscCall(VecGetOwnershipRange(y, &low, NULL)); 738 739 if (size == 1) { 740 PetscCall(VecGetSize(x, &vsize)); 741 PetscCall(VecGetSize(y, &vsize1)); 742 PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 743 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 744 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 745 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 746 PetscCall(VecScatterDestroy(&vecscat)); 747 PetscCall(ISDestroy(&list1)); 748 #if !PetscDefined(HAVE_MPIUNI) 749 } else { 750 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 751 PetscInt ndim = fft->ndim, *dim = fft->dim; 752 ptrdiff_t local_n0, local_0_start; 753 ptrdiff_t local_n1, local_1_start; 754 IS list2; 755 #if !defined(PETSC_USE_COMPLEX) 756 PetscInt i, j, k, partial_dim; 757 PetscInt *indx1, *indx2, tempindx, tempindx1; 758 PetscInt NM; 759 ptrdiff_t temp; 760 #endif 761 762 switch (ndim) { 763 case 1: 764 #if defined(PETSC_USE_COMPLEX) 765 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 766 767 PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 768 PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 769 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 770 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 771 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 772 PetscCall(VecScatterDestroy(&vecscat)); 773 PetscCall(ISDestroy(&list1)); 774 PetscCall(ISDestroy(&list2)); 775 #else 776 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 777 #endif 778 break; 779 case 2: 780 #if defined(PETSC_USE_COMPLEX) 781 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 782 783 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 784 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 785 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 786 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 787 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 788 PetscCall(VecScatterDestroy(&vecscat)); 789 PetscCall(ISDestroy(&list1)); 790 PetscCall(ISDestroy(&list2)); 791 #else 792 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 793 794 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 795 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 796 797 if (dim[1] % 2 == 0) { 798 NM = dim[1] + 2; 799 } else { 800 NM = dim[1] + 1; 801 } 802 for (i = 0; i < local_n0; i++) { 803 for (j = 0; j < dim[1]; j++) { 804 tempindx = i * dim[1] + j; 805 tempindx1 = i * NM + j; 806 807 indx1[tempindx] = local_0_start * dim[1] + tempindx; 808 indx2[tempindx] = low + tempindx1; 809 } 810 } 811 812 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 813 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 814 815 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 816 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 817 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 818 PetscCall(VecScatterDestroy(&vecscat)); 819 PetscCall(ISDestroy(&list1)); 820 PetscCall(ISDestroy(&list2)); 821 PetscCall(PetscFree(indx1)); 822 PetscCall(PetscFree(indx2)); 823 #endif 824 break; 825 826 case 3: 827 #if defined(PETSC_USE_COMPLEX) 828 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 829 830 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 831 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 832 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 833 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 834 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 835 PetscCall(VecScatterDestroy(&vecscat)); 836 PetscCall(ISDestroy(&list1)); 837 PetscCall(ISDestroy(&list2)); 838 #else 839 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 840 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 841 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 842 843 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 844 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 845 846 if (dim[2] % 2 == 0) NM = dim[2] + 2; 847 else NM = dim[2] + 1; 848 849 for (i = 0; i < local_n0; i++) { 850 for (j = 0; j < dim[1]; j++) { 851 for (k = 0; k < dim[2]; k++) { 852 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 853 tempindx1 = i * dim[1] * NM + j * NM + k; 854 855 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 856 indx2[tempindx] = low + tempindx1; 857 } 858 } 859 } 860 861 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 862 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 863 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 864 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 865 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 866 PetscCall(VecScatterDestroy(&vecscat)); 867 PetscCall(ISDestroy(&list1)); 868 PetscCall(ISDestroy(&list2)); 869 PetscCall(PetscFree(indx1)); 870 PetscCall(PetscFree(indx2)); 871 #endif 872 break; 873 874 default: 875 #if defined(PETSC_USE_COMPLEX) 876 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 877 878 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 879 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 880 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 881 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 882 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 883 PetscCall(VecScatterDestroy(&vecscat)); 884 PetscCall(ISDestroy(&list1)); 885 PetscCall(ISDestroy(&list2)); 886 #else 887 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 888 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 889 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 890 891 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 892 893 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 894 895 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 896 897 partial_dim = fftw->partial_dim; 898 899 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 900 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 901 902 if (dim[ndim - 1] % 2 == 0) NM = 2; 903 else NM = 1; 904 905 j = low; 906 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 907 indx1[i] = local_0_start * partial_dim + i; 908 indx2[i] = j; 909 if (k % dim[ndim - 1] == 0) j += NM; 910 j++; 911 } 912 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 913 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 914 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 915 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 916 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 917 PetscCall(VecScatterDestroy(&vecscat)); 918 PetscCall(ISDestroy(&list1)); 919 PetscCall(ISDestroy(&list2)); 920 PetscCall(PetscFree(indx1)); 921 PetscCall(PetscFree(indx2)); 922 #endif 923 break; 924 } 925 #endif 926 } 927 PetscFunctionReturn(0); 928 } 929 930 /*@ 931 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 932 933 Collective on Mat 934 935 Input Parameters: 936 + A - FFTW matrix 937 - x - FFTW vector 938 939 Output Parameters: 940 . y - PETSc vector 941 942 Level: intermediate 943 944 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 945 VecScatterFFTWToPetsc removes those extra zeros. 946 947 .seealso: `VecScatterPetscToFFTW()` 948 @*/ 949 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) { 950 PetscFunctionBegin; 951 PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 952 PetscFunctionReturn(0); 953 } 954 955 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) { 956 MPI_Comm comm; 957 Mat_FFT *fft = (Mat_FFT *)A->data; 958 PetscInt low; 959 PetscMPIInt rank, size; 960 VecScatter vecscat; 961 IS list1; 962 963 PetscFunctionBegin; 964 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 965 PetscCallMPI(MPI_Comm_size(comm, &size)); 966 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 967 PetscCall(VecGetOwnershipRange(x, &low, NULL)); 968 969 if (size == 1) { 970 PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 971 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 972 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 973 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 974 PetscCall(VecScatterDestroy(&vecscat)); 975 PetscCall(ISDestroy(&list1)); 976 977 #if !PetscDefined(HAVE_MPIUNI) 978 } else { 979 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 980 PetscInt ndim = fft->ndim, *dim = fft->dim; 981 ptrdiff_t local_n0, local_0_start; 982 ptrdiff_t local_n1, local_1_start; 983 IS list2; 984 #if !defined(PETSC_USE_COMPLEX) 985 PetscInt i, j, k, partial_dim; 986 PetscInt *indx1, *indx2, tempindx, tempindx1; 987 PetscInt NM; 988 ptrdiff_t temp; 989 #endif 990 switch (ndim) { 991 case 1: 992 #if defined(PETSC_USE_COMPLEX) 993 fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 994 995 PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 996 PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 997 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 998 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 999 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1000 PetscCall(VecScatterDestroy(&vecscat)); 1001 PetscCall(ISDestroy(&list1)); 1002 PetscCall(ISDestroy(&list2)); 1003 #else 1004 SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 1005 #endif 1006 break; 1007 case 2: 1008 #if defined(PETSC_USE_COMPLEX) 1009 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1010 1011 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 1012 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 1013 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1014 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1015 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1016 PetscCall(VecScatterDestroy(&vecscat)); 1017 PetscCall(ISDestroy(&list1)); 1018 PetscCall(ISDestroy(&list2)); 1019 #else 1020 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1021 1022 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 1023 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 1024 1025 if (dim[1] % 2 == 0) NM = dim[1] + 2; 1026 else NM = dim[1] + 1; 1027 1028 for (i = 0; i < local_n0; i++) { 1029 for (j = 0; j < dim[1]; j++) { 1030 tempindx = i * dim[1] + j; 1031 tempindx1 = i * NM + j; 1032 1033 indx1[tempindx] = local_0_start * dim[1] + tempindx; 1034 indx2[tempindx] = low + tempindx1; 1035 } 1036 } 1037 1038 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 1039 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 1040 1041 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1042 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1043 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscCall(VecScatterDestroy(&vecscat)); 1045 PetscCall(ISDestroy(&list1)); 1046 PetscCall(ISDestroy(&list2)); 1047 PetscCall(PetscFree(indx1)); 1048 PetscCall(PetscFree(indx2)); 1049 #endif 1050 break; 1051 case 3: 1052 #if defined(PETSC_USE_COMPLEX) 1053 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1054 1055 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 1056 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 1057 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1058 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1059 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1060 PetscCall(VecScatterDestroy(&vecscat)); 1061 PetscCall(ISDestroy(&list1)); 1062 PetscCall(ISDestroy(&list2)); 1063 #else 1064 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1065 1066 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 1067 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 1068 1069 if (dim[2] % 2 == 0) NM = dim[2] + 2; 1070 else NM = dim[2] + 1; 1071 1072 for (i = 0; i < local_n0; i++) { 1073 for (j = 0; j < dim[1]; j++) { 1074 for (k = 0; k < dim[2]; k++) { 1075 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 1076 tempindx1 = i * dim[1] * NM + j * NM + k; 1077 1078 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 1079 indx2[tempindx] = low + tempindx1; 1080 } 1081 } 1082 } 1083 1084 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 1085 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 1086 1087 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1088 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1089 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1090 PetscCall(VecScatterDestroy(&vecscat)); 1091 PetscCall(ISDestroy(&list1)); 1092 PetscCall(ISDestroy(&list2)); 1093 PetscCall(PetscFree(indx1)); 1094 PetscCall(PetscFree(indx2)); 1095 #endif 1096 break; 1097 default: 1098 #if defined(PETSC_USE_COMPLEX) 1099 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 1100 1101 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 1102 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 1103 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1104 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1105 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1106 PetscCall(VecScatterDestroy(&vecscat)); 1107 PetscCall(ISDestroy(&list1)); 1108 PetscCall(ISDestroy(&list2)); 1109 #else 1110 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 1111 1112 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 1113 1114 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1115 1116 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1117 1118 partial_dim = fftw->partial_dim; 1119 1120 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 1121 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 1122 1123 if (dim[ndim - 1] % 2 == 0) NM = 2; 1124 else NM = 1; 1125 1126 j = low; 1127 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 1128 indx1[i] = local_0_start * partial_dim + i; 1129 indx2[i] = j; 1130 if (k % dim[ndim - 1] == 0) j += NM; 1131 j++; 1132 } 1133 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 1134 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1135 1136 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1137 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1138 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1139 PetscCall(VecScatterDestroy(&vecscat)); 1140 PetscCall(ISDestroy(&list1)); 1141 PetscCall(ISDestroy(&list2)); 1142 PetscCall(PetscFree(indx1)); 1143 PetscCall(PetscFree(indx2)); 1144 #endif 1145 break; 1146 } 1147 #endif 1148 } 1149 PetscFunctionReturn(0); 1150 } 1151 1152 /* 1153 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1154 1155 Options Database Keys: 1156 + -mat_fftw_plannerflags - set FFTW planner flags 1157 1158 Level: intermediate 1159 1160 */ 1161 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) { 1162 MPI_Comm comm; 1163 Mat_FFT *fft = (Mat_FFT *)A->data; 1164 Mat_FFTW *fftw; 1165 PetscInt ndim = fft->ndim, *dim = fft->dim; 1166 const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 1167 unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1168 PetscBool flg; 1169 PetscInt p_flag, partial_dim = 1, ctr; 1170 PetscMPIInt size, rank; 1171 ptrdiff_t *pdim; 1172 #if !defined(PETSC_USE_COMPLEX) 1173 PetscInt tot_dim; 1174 #endif 1175 1176 PetscFunctionBegin; 1177 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1178 PetscCallMPI(MPI_Comm_size(comm, &size)); 1179 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1180 1181 #if !PetscDefined(HAVE_MPIUNI) 1182 fftw_mpi_init(); 1183 #endif 1184 pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 1185 pdim[0] = dim[0]; 1186 #if !defined(PETSC_USE_COMPLEX) 1187 tot_dim = 2 * dim[0]; 1188 #endif 1189 for (ctr = 1; ctr < ndim; ctr++) { 1190 partial_dim *= dim[ctr]; 1191 pdim[ctr] = dim[ctr]; 1192 #if !defined(PETSC_USE_COMPLEX) 1193 if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1194 else tot_dim *= dim[ctr]; 1195 #endif 1196 } 1197 1198 if (size == 1) { 1199 #if defined(PETSC_USE_COMPLEX) 1200 PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 1201 fft->n = fft->N; 1202 #else 1203 PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 1204 fft->n = tot_dim; 1205 #endif 1206 #if !PetscDefined(HAVE_MPIUNI) 1207 } else { 1208 ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 1209 #if !defined(PETSC_USE_COMPLEX) 1210 ptrdiff_t temp; 1211 PetscInt N1; 1212 #endif 1213 1214 switch (ndim) { 1215 case 1: 1216 #if !defined(PETSC_USE_COMPLEX) 1217 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1218 #else 1219 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1220 fft->n = (PetscInt)local_n0; 1221 PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1222 #endif 1223 break; 1224 case 2: 1225 #if defined(PETSC_USE_COMPLEX) 1226 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1227 fft->n = (PetscInt)local_n0 * dim[1]; 1228 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1229 #else 1230 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1231 1232 fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 1233 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 1234 #endif 1235 break; 1236 case 3: 1237 #if defined(PETSC_USE_COMPLEX) 1238 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1239 1240 fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 1241 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1242 #else 1243 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1244 1245 fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 1246 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 1247 #endif 1248 break; 1249 default: 1250 #if defined(PETSC_USE_COMPLEX) 1251 fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 1252 1253 fft->n = (PetscInt)local_n0 * partial_dim; 1254 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1255 #else 1256 temp = pdim[ndim - 1]; 1257 1258 pdim[ndim - 1] = temp / 2 + 1; 1259 1260 fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1261 1262 fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 1263 N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 1264 1265 pdim[ndim - 1] = temp; 1266 1267 PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1268 #endif 1269 break; 1270 } 1271 #endif 1272 } 1273 free(pdim); 1274 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 1275 PetscCall(PetscNewLog(A, &fftw)); 1276 fft->data = (void *)fftw; 1277 1278 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1279 fftw->partial_dim = partial_dim; 1280 1281 PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 1282 if (size == 1) { 1283 #if defined(PETSC_USE_64BIT_INDICES) 1284 fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1285 #else 1286 fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1287 #endif 1288 } 1289 1290 for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1291 1292 fftw->p_forward = NULL; 1293 fftw->p_backward = NULL; 1294 fftw->p_flag = FFTW_ESTIMATE; 1295 1296 if (size == 1) { 1297 A->ops->mult = MatMult_SeqFFTW; 1298 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1299 #if !PetscDefined(HAVE_MPIUNI) 1300 } else { 1301 A->ops->mult = MatMult_MPIFFTW; 1302 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1303 #endif 1304 } 1305 fft->matdestroy = MatDestroy_FFTW; 1306 A->assembled = PETSC_TRUE; 1307 A->preallocated = PETSC_TRUE; 1308 1309 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 1310 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 1311 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1312 1313 /* get runtime options */ 1314 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 1315 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 1316 if (flg) fftw->p_flag = iplans[p_flag]; 1317 PetscOptionsEnd(); 1318 PetscFunctionReturn(0); 1319 } 1320