1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #if !PetscDefined(HAVE_MPIUNI) 10 #include <fftw3-mpi.h> 11 #else 12 #include <fftw3.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 ptrdiff_t ndim_fftw, *dim_fftw; 18 #if defined(PETSC_USE_64BIT_INDICES) 19 fftw_iodim64 *iodims; 20 #else 21 fftw_iodim *iodims; 22 #endif 23 PetscInt partial_dim; 24 fftw_plan p_forward, p_backward; 25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26 PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays becaue fftw plan should be 27 executed for the arrays with which the plan was created */ 28 } Mat_FFTW; 29 30 extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 32 extern PetscErrorCode MatDestroy_FFTW(Mat); 33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 34 #if !PetscDefined(HAVE_MPIUNI) 35 extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 38 #endif 39 40 /* 41 MatMult_SeqFFTW performs forward DFT 42 Input parameter: 43 A - the matrix 44 x - the vector on which FDFT will be performed 45 46 Output parameter: 47 y - vector that stores result of FDFT 48 */ 49 PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) { 50 Mat_FFT *fft = (Mat_FFT *)A->data; 51 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 52 const PetscScalar *x_array; 53 PetscScalar *y_array; 54 #if defined(PETSC_USE_COMPLEX) 55 #if defined(PETSC_USE_64BIT_INDICES) 56 fftw_iodim64 *iodims; 57 #else 58 fftw_iodim *iodims; 59 #endif 60 PetscInt i; 61 #endif 62 PetscInt ndim = fft->ndim, *dim = fft->dim; 63 64 PetscFunctionBegin; 65 PetscCall(VecGetArrayRead(x, &x_array)); 66 PetscCall(VecGetArray(y, &y_array)); 67 if (!fftw->p_forward) { /* create a plan, then execute it */ 68 switch (ndim) { 69 case 1: 70 #if defined(PETSC_USE_COMPLEX) 71 fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 72 #else 73 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 74 #endif 75 break; 76 case 2: 77 #if defined(PETSC_USE_COMPLEX) 78 fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 79 #else 80 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 81 #endif 82 break; 83 case 3: 84 #if defined(PETSC_USE_COMPLEX) 85 fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 86 #else 87 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 88 #endif 89 break; 90 default: 91 #if defined(PETSC_USE_COMPLEX) 92 iodims = fftw->iodims; 93 #if defined(PETSC_USE_64BIT_INDICES) 94 if (ndim) { 95 iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 96 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 97 for (i = ndim - 2; i >= 0; --i) { 98 iodims[i].n = (ptrdiff_t)dim[i]; 99 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 100 } 101 } 102 fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 103 #else 104 if (ndim) { 105 iodims[ndim - 1].n = (int)dim[ndim - 1]; 106 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 107 for (i = ndim - 2; i >= 0; --i) { 108 iodims[i].n = (int)dim[i]; 109 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 110 } 111 } 112 fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 113 #endif 114 115 #else 116 fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 117 #endif 118 break; 119 } 120 fftw->finarray = (PetscScalar *)x_array; 121 fftw->foutarray = y_array; 122 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 123 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 124 fftw_execute(fftw->p_forward); 125 } else { /* use existing plan */ 126 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 127 #if defined(PETSC_USE_COMPLEX) 128 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 129 #else 130 fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 131 #endif 132 } else { 133 fftw_execute(fftw->p_forward); 134 } 135 } 136 PetscCall(VecRestoreArray(y, &y_array)); 137 PetscCall(VecRestoreArrayRead(x, &x_array)); 138 PetscFunctionReturn(0); 139 } 140 141 /* MatMultTranspose_SeqFFTW performs serial backward DFT 142 Input parameter: 143 A - the matrix 144 x - the vector on which BDFT will be performed 145 146 Output parameter: 147 y - vector that stores result of BDFT 148 */ 149 150 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) { 151 Mat_FFT *fft = (Mat_FFT *)A->data; 152 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 153 const PetscScalar *x_array; 154 PetscScalar *y_array; 155 PetscInt ndim = fft->ndim, *dim = fft->dim; 156 #if defined(PETSC_USE_COMPLEX) 157 #if defined(PETSC_USE_64BIT_INDICES) 158 fftw_iodim64 *iodims = fftw->iodims; 159 #else 160 fftw_iodim *iodims = fftw->iodims; 161 #endif 162 #endif 163 164 PetscFunctionBegin; 165 PetscCall(VecGetArrayRead(x, &x_array)); 166 PetscCall(VecGetArray(y, &y_array)); 167 if (!fftw->p_backward) { /* create a plan, then execute it */ 168 switch (ndim) { 169 case 1: 170 #if defined(PETSC_USE_COMPLEX) 171 fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 172 #else 173 fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 174 #endif 175 break; 176 case 2: 177 #if defined(PETSC_USE_COMPLEX) 178 fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 179 #else 180 fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 181 #endif 182 break; 183 case 3: 184 #if defined(PETSC_USE_COMPLEX) 185 fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 186 #else 187 fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 188 #endif 189 break; 190 default: 191 #if defined(PETSC_USE_COMPLEX) 192 #if defined(PETSC_USE_64BIT_INDICES) 193 fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 194 #else 195 fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 196 #endif 197 #else 198 fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 199 #endif 200 break; 201 } 202 fftw->binarray = (PetscScalar *)x_array; 203 fftw->boutarray = y_array; 204 fftw_execute(fftw->p_backward); 205 } else { /* use existing plan */ 206 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 207 #if defined(PETSC_USE_COMPLEX) 208 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 209 #else 210 fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 211 #endif 212 } else { 213 fftw_execute(fftw->p_backward); 214 } 215 } 216 PetscCall(VecRestoreArray(y, &y_array)); 217 PetscCall(VecRestoreArrayRead(x, &x_array)); 218 PetscFunctionReturn(0); 219 } 220 221 #if !PetscDefined(HAVE_MPIUNI) 222 /* MatMult_MPIFFTW performs forward DFT in parallel 223 Input parameter: 224 A - the matrix 225 x - the vector on which FDFT will be performed 226 227 Output parameter: 228 y - vector that stores result of FDFT 229 */ 230 PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) { 231 Mat_FFT *fft = (Mat_FFT *)A->data; 232 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 233 const PetscScalar *x_array; 234 PetscScalar *y_array; 235 PetscInt ndim = fft->ndim, *dim = fft->dim; 236 MPI_Comm comm; 237 238 PetscFunctionBegin; 239 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 240 PetscCall(VecGetArrayRead(x, &x_array)); 241 PetscCall(VecGetArray(y, &y_array)); 242 if (!fftw->p_forward) { /* create a plan, then execute it */ 243 switch (ndim) { 244 case 1: 245 #if defined(PETSC_USE_COMPLEX) 246 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 247 #else 248 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 249 #endif 250 break; 251 case 2: 252 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 253 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 254 #else 255 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 256 #endif 257 break; 258 case 3: 259 #if defined(PETSC_USE_COMPLEX) 260 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 261 #else 262 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 263 #endif 264 break; 265 default: 266 #if defined(PETSC_USE_COMPLEX) 267 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 268 #else 269 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 270 #endif 271 break; 272 } 273 fftw->finarray = (PetscScalar *)x_array; 274 fftw->foutarray = y_array; 275 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 276 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 277 fftw_execute(fftw->p_forward); 278 } else { /* use existing plan */ 279 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 280 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 281 } else { 282 fftw_execute(fftw->p_forward); 283 } 284 } 285 PetscCall(VecRestoreArray(y, &y_array)); 286 PetscCall(VecRestoreArrayRead(x, &x_array)); 287 PetscFunctionReturn(0); 288 } 289 290 /* 291 MatMultTranspose_MPIFFTW performs parallel backward DFT 292 Input parameter: 293 A - the matrix 294 x - the vector on which BDFT will be performed 295 296 Output parameter: 297 y - vector that stores result of BDFT 298 */ 299 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) { 300 Mat_FFT *fft = (Mat_FFT *)A->data; 301 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 302 const PetscScalar *x_array; 303 PetscScalar *y_array; 304 PetscInt ndim = fft->ndim, *dim = fft->dim; 305 MPI_Comm comm; 306 307 PetscFunctionBegin; 308 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 309 PetscCall(VecGetArrayRead(x, &x_array)); 310 PetscCall(VecGetArray(y, &y_array)); 311 if (!fftw->p_backward) { /* create a plan, then execute it */ 312 switch (ndim) { 313 case 1: 314 #if defined(PETSC_USE_COMPLEX) 315 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 316 #else 317 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 318 #endif 319 break; 320 case 2: 321 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 322 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 323 #else 324 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 325 #endif 326 break; 327 case 3: 328 #if defined(PETSC_USE_COMPLEX) 329 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 330 #else 331 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 332 #endif 333 break; 334 default: 335 #if defined(PETSC_USE_COMPLEX) 336 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 337 #else 338 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 339 #endif 340 break; 341 } 342 fftw->binarray = (PetscScalar *)x_array; 343 fftw->boutarray = y_array; 344 fftw_execute(fftw->p_backward); 345 } else { /* use existing plan */ 346 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 347 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 348 } else { 349 fftw_execute(fftw->p_backward); 350 } 351 } 352 PetscCall(VecRestoreArray(y, &y_array)); 353 PetscCall(VecRestoreArrayRead(x, &x_array)); 354 PetscFunctionReturn(0); 355 } 356 #endif 357 358 PetscErrorCode MatDestroy_FFTW(Mat A) { 359 Mat_FFT *fft = (Mat_FFT *)A->data; 360 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 361 362 PetscFunctionBegin; 363 fftw_destroy_plan(fftw->p_forward); 364 fftw_destroy_plan(fftw->p_backward); 365 if (fftw->iodims) free(fftw->iodims); 366 PetscCall(PetscFree(fftw->dim_fftw)); 367 PetscCall(PetscFree(fft->data)); 368 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 369 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 370 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 371 #if !PetscDefined(HAVE_MPIUNI) 372 fftw_mpi_cleanup(); 373 #endif 374 PetscFunctionReturn(0); 375 } 376 377 #if !PetscDefined(HAVE_MPIUNI) 378 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 379 PetscErrorCode VecDestroy_MPIFFTW(Vec v) { 380 PetscScalar *array; 381 382 PetscFunctionBegin; 383 PetscCall(VecGetArray(v, &array)); 384 fftw_free((fftw_complex *)array); 385 PetscCall(VecRestoreArray(v, &array)); 386 PetscCall(VecDestroy_MPI(v)); 387 PetscFunctionReturn(0); 388 } 389 #endif 390 391 #if !PetscDefined(HAVE_MPIUNI) 392 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) { 393 Mat A; 394 395 PetscFunctionBegin; 396 PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 397 PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 398 PetscFunctionReturn(0); 399 } 400 401 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) { 402 Mat A; 403 404 PetscFunctionBegin; 405 PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 406 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 407 PetscFunctionReturn(0); 408 } 409 410 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) { 411 Mat A; 412 413 PetscFunctionBegin; 414 PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 415 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 416 PetscFunctionReturn(0); 417 } 418 #endif 419 420 /*@ 421 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 422 parallel layout determined by `MATFFTW` 423 424 Collective on Mat 425 426 Input Parameter: 427 . A - the matrix 428 429 Output Parameters: 430 + x - (optional) input vector of forward FFTW 431 . y - (optional) output vector of forward FFTW 432 - z - (optional) output vector of backward FFTW 433 434 Level: advanced 435 436 Notes: 437 The parallel layout of output of forward FFTW is always same as the input 438 of the backward FFTW. But parallel layout of the input vector of forward 439 FFTW might not be same as the output of backward FFTW. 440 441 We need to provide enough space while doing parallel real transform. 442 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 443 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 444 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 445 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 446 zeros if it is odd only one zero is needed. 447 448 Lastly one needs some scratch space at the end of data set in each process. alloc_local 449 figures out how much space is needed, i.e. it figures out the data+scratch space for 450 each processor and returns that. 451 452 Developer Note: 453 How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 454 455 .seealso: `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 456 @*/ 457 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) { 458 PetscFunctionBegin; 459 PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 460 PetscFunctionReturn(0); 461 } 462 463 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) { 464 PetscMPIInt size, rank; 465 MPI_Comm comm; 466 Mat_FFT *fft = (Mat_FFT *)A->data; 467 468 PetscFunctionBegin; 469 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 470 PetscValidType(A, 1); 471 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 472 473 PetscCallMPI(MPI_Comm_size(comm, &size)); 474 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 475 if (size == 1) { /* sequential case */ 476 #if defined(PETSC_USE_COMPLEX) 477 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 478 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 479 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 480 #else 481 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 482 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 483 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 484 #endif 485 #if !PetscDefined(HAVE_MPIUNI) 486 } else { /* parallel cases */ 487 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 488 PetscInt ndim = fft->ndim, *dim = fft->dim; 489 ptrdiff_t alloc_local, local_n0, local_0_start; 490 ptrdiff_t local_n1; 491 fftw_complex *data_fout; 492 ptrdiff_t local_1_start; 493 #if defined(PETSC_USE_COMPLEX) 494 fftw_complex *data_fin, *data_bout; 495 #else 496 double *data_finr, *data_boutr; 497 PetscInt n1, N1; 498 ptrdiff_t temp; 499 #endif 500 501 switch (ndim) { 502 case 1: 503 #if !defined(PETSC_USE_COMPLEX) 504 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 505 #else 506 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 507 if (fin) { 508 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 509 PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 510 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 511 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 512 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 513 } 514 if (fout) { 515 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 516 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 517 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 518 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 519 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 520 } 521 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 522 if (bout) { 523 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 524 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 525 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 526 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 527 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 528 } 529 break; 530 #endif 531 case 2: 532 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 533 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 534 N1 = 2 * dim[0] * (dim[1] / 2 + 1); 535 n1 = 2 * local_n0 * (dim[1] / 2 + 1); 536 if (fin) { 537 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 538 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 539 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 540 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 541 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 542 } 543 if (fout) { 544 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 545 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 546 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 547 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 548 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 549 } 550 if (bout) { 551 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 552 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 553 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 554 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 555 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 556 } 557 #else 558 /* Get local size */ 559 alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 560 if (fin) { 561 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 562 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 563 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 564 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 565 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 566 } 567 if (fout) { 568 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 569 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 570 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 571 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 572 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 573 } 574 if (bout) { 575 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 576 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 577 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 578 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 579 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 580 } 581 #endif 582 break; 583 case 3: 584 #if !defined(PETSC_USE_COMPLEX) 585 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 586 N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 587 n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 588 if (fin) { 589 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 590 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 591 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 592 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 593 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 594 } 595 if (fout) { 596 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 597 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 598 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 599 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 600 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 601 } 602 if (bout) { 603 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 604 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 605 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 606 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 607 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 608 } 609 #else 610 alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 611 if (fin) { 612 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 613 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 614 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 615 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 616 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 617 } 618 if (fout) { 619 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 620 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 621 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 622 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 623 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 624 } 625 if (bout) { 626 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 627 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 628 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 629 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 630 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 631 } 632 #endif 633 break; 634 default: 635 #if !defined(PETSC_USE_COMPLEX) 636 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 637 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 638 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 639 N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 640 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 641 642 if (fin) { 643 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 644 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 645 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 646 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 647 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 648 } 649 if (fout) { 650 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 651 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 652 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 653 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 654 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 655 } 656 if (bout) { 657 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 658 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 659 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 660 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 661 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 662 } 663 #else 664 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 665 if (fin) { 666 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 667 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 668 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 669 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 670 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 671 } 672 if (fout) { 673 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 674 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 675 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 676 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 677 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 678 } 679 if (bout) { 680 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 681 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 682 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 683 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 684 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 685 } 686 #endif 687 break; 688 } 689 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 690 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 691 memory leaks. We void these pointers here to avoid acident uses. 692 */ 693 if (fin) (*fin)->ops->replacearray = NULL; 694 if (fout) (*fout)->ops->replacearray = NULL; 695 if (bout) (*bout)->ops->replacearray = NULL; 696 #endif 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /*@ 702 VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 703 704 Collective on Mat 705 706 Input Parameters: 707 + A - FFTW matrix 708 - x - the PETSc vector 709 710 Output Parameters: 711 . y - the FFTW vector 712 713 Level: intermediate 714 715 Note: 716 For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 717 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 718 zeros. This routine does that job by scattering operation. 719 720 .seealso: `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 721 @*/ 722 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) { 723 PetscFunctionBegin; 724 PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 725 PetscFunctionReturn(0); 726 } 727 728 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) { 729 MPI_Comm comm; 730 Mat_FFT *fft = (Mat_FFT *)A->data; 731 PetscInt low; 732 PetscMPIInt rank, size; 733 PetscInt vsize, vsize1; 734 VecScatter vecscat; 735 IS list1; 736 737 PetscFunctionBegin; 738 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 739 PetscCallMPI(MPI_Comm_size(comm, &size)); 740 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 741 PetscCall(VecGetOwnershipRange(y, &low, NULL)); 742 743 if (size == 1) { 744 PetscCall(VecGetSize(x, &vsize)); 745 PetscCall(VecGetSize(y, &vsize1)); 746 PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 747 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 748 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 749 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 750 PetscCall(VecScatterDestroy(&vecscat)); 751 PetscCall(ISDestroy(&list1)); 752 #if !PetscDefined(HAVE_MPIUNI) 753 } else { 754 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 755 PetscInt ndim = fft->ndim, *dim = fft->dim; 756 ptrdiff_t local_n0, local_0_start; 757 ptrdiff_t local_n1, local_1_start; 758 IS list2; 759 #if !defined(PETSC_USE_COMPLEX) 760 PetscInt i, j, k, partial_dim; 761 PetscInt *indx1, *indx2, tempindx, tempindx1; 762 PetscInt NM; 763 ptrdiff_t temp; 764 #endif 765 766 switch (ndim) { 767 case 1: 768 #if defined(PETSC_USE_COMPLEX) 769 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 770 771 PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 772 PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 773 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 774 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 775 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 776 PetscCall(VecScatterDestroy(&vecscat)); 777 PetscCall(ISDestroy(&list1)); 778 PetscCall(ISDestroy(&list2)); 779 #else 780 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 781 #endif 782 break; 783 case 2: 784 #if defined(PETSC_USE_COMPLEX) 785 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 786 787 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 788 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 789 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 790 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 791 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 792 PetscCall(VecScatterDestroy(&vecscat)); 793 PetscCall(ISDestroy(&list1)); 794 PetscCall(ISDestroy(&list2)); 795 #else 796 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 797 798 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 799 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 800 801 if (dim[1] % 2 == 0) { 802 NM = dim[1] + 2; 803 } else { 804 NM = dim[1] + 1; 805 } 806 for (i = 0; i < local_n0; i++) { 807 for (j = 0; j < dim[1]; j++) { 808 tempindx = i * dim[1] + j; 809 tempindx1 = i * NM + j; 810 811 indx1[tempindx] = local_0_start * dim[1] + tempindx; 812 indx2[tempindx] = low + tempindx1; 813 } 814 } 815 816 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 817 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 818 819 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 820 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 821 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 822 PetscCall(VecScatterDestroy(&vecscat)); 823 PetscCall(ISDestroy(&list1)); 824 PetscCall(ISDestroy(&list2)); 825 PetscCall(PetscFree(indx1)); 826 PetscCall(PetscFree(indx2)); 827 #endif 828 break; 829 830 case 3: 831 #if defined(PETSC_USE_COMPLEX) 832 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 833 834 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 835 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 836 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 837 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 838 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 839 PetscCall(VecScatterDestroy(&vecscat)); 840 PetscCall(ISDestroy(&list1)); 841 PetscCall(ISDestroy(&list2)); 842 #else 843 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 844 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 845 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 846 847 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 848 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 849 850 if (dim[2] % 2 == 0) NM = dim[2] + 2; 851 else NM = dim[2] + 1; 852 853 for (i = 0; i < local_n0; i++) { 854 for (j = 0; j < dim[1]; j++) { 855 for (k = 0; k < dim[2]; k++) { 856 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 857 tempindx1 = i * dim[1] * NM + j * NM + k; 858 859 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 860 indx2[tempindx] = low + tempindx1; 861 } 862 } 863 } 864 865 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 866 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 867 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 868 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 869 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 870 PetscCall(VecScatterDestroy(&vecscat)); 871 PetscCall(ISDestroy(&list1)); 872 PetscCall(ISDestroy(&list2)); 873 PetscCall(PetscFree(indx1)); 874 PetscCall(PetscFree(indx2)); 875 #endif 876 break; 877 878 default: 879 #if defined(PETSC_USE_COMPLEX) 880 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 881 882 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 883 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 884 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 885 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 886 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 887 PetscCall(VecScatterDestroy(&vecscat)); 888 PetscCall(ISDestroy(&list1)); 889 PetscCall(ISDestroy(&list2)); 890 #else 891 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 892 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 893 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 894 895 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 896 897 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 898 899 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 900 901 partial_dim = fftw->partial_dim; 902 903 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 904 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 905 906 if (dim[ndim - 1] % 2 == 0) NM = 2; 907 else NM = 1; 908 909 j = low; 910 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 911 indx1[i] = local_0_start * partial_dim + i; 912 indx2[i] = j; 913 if (k % dim[ndim - 1] == 0) j += NM; 914 j++; 915 } 916 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 917 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 918 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 919 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 920 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 921 PetscCall(VecScatterDestroy(&vecscat)); 922 PetscCall(ISDestroy(&list1)); 923 PetscCall(ISDestroy(&list2)); 924 PetscCall(PetscFree(indx1)); 925 PetscCall(PetscFree(indx2)); 926 #endif 927 break; 928 } 929 #endif 930 } 931 PetscFunctionReturn(0); 932 } 933 934 /*@ 935 VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 936 937 Collective on Mat 938 939 Input Parameters: 940 + A - `MATFFTW` matrix 941 - x - FFTW vector 942 943 Output Parameters: 944 . y - PETSc vector 945 946 Level: intermediate 947 948 Note: 949 While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 950 `VecScatterFFTWToPetsc()` removes those extra zeros. 951 952 .seealso: `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 953 @*/ 954 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) { 955 PetscFunctionBegin; 956 PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 957 PetscFunctionReturn(0); 958 } 959 960 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) { 961 MPI_Comm comm; 962 Mat_FFT *fft = (Mat_FFT *)A->data; 963 PetscInt low; 964 PetscMPIInt rank, size; 965 VecScatter vecscat; 966 IS list1; 967 968 PetscFunctionBegin; 969 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 970 PetscCallMPI(MPI_Comm_size(comm, &size)); 971 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 972 PetscCall(VecGetOwnershipRange(x, &low, NULL)); 973 974 if (size == 1) { 975 PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 976 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 977 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 978 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 979 PetscCall(VecScatterDestroy(&vecscat)); 980 PetscCall(ISDestroy(&list1)); 981 982 #if !PetscDefined(HAVE_MPIUNI) 983 } else { 984 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 985 PetscInt ndim = fft->ndim, *dim = fft->dim; 986 ptrdiff_t local_n0, local_0_start; 987 ptrdiff_t local_n1, local_1_start; 988 IS list2; 989 #if !defined(PETSC_USE_COMPLEX) 990 PetscInt i, j, k, partial_dim; 991 PetscInt *indx1, *indx2, tempindx, tempindx1; 992 PetscInt NM; 993 ptrdiff_t temp; 994 #endif 995 switch (ndim) { 996 case 1: 997 #if defined(PETSC_USE_COMPLEX) 998 fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 999 1000 PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 1001 PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 1002 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1003 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1004 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1005 PetscCall(VecScatterDestroy(&vecscat)); 1006 PetscCall(ISDestroy(&list1)); 1007 PetscCall(ISDestroy(&list2)); 1008 #else 1009 SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 1010 #endif 1011 break; 1012 case 2: 1013 #if defined(PETSC_USE_COMPLEX) 1014 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1015 1016 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 1017 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 1018 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1019 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1020 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1021 PetscCall(VecScatterDestroy(&vecscat)); 1022 PetscCall(ISDestroy(&list1)); 1023 PetscCall(ISDestroy(&list2)); 1024 #else 1025 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1026 1027 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 1028 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 1029 1030 if (dim[1] % 2 == 0) NM = dim[1] + 2; 1031 else NM = dim[1] + 1; 1032 1033 for (i = 0; i < local_n0; i++) { 1034 for (j = 0; j < dim[1]; j++) { 1035 tempindx = i * dim[1] + j; 1036 tempindx1 = i * NM + j; 1037 1038 indx1[tempindx] = local_0_start * dim[1] + tempindx; 1039 indx2[tempindx] = low + tempindx1; 1040 } 1041 } 1042 1043 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 1044 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 1045 1046 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1047 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1048 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1049 PetscCall(VecScatterDestroy(&vecscat)); 1050 PetscCall(ISDestroy(&list1)); 1051 PetscCall(ISDestroy(&list2)); 1052 PetscCall(PetscFree(indx1)); 1053 PetscCall(PetscFree(indx2)); 1054 #endif 1055 break; 1056 case 3: 1057 #if defined(PETSC_USE_COMPLEX) 1058 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1059 1060 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 1061 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 1062 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1063 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1065 PetscCall(VecScatterDestroy(&vecscat)); 1066 PetscCall(ISDestroy(&list1)); 1067 PetscCall(ISDestroy(&list2)); 1068 #else 1069 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1070 1071 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 1072 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 1073 1074 if (dim[2] % 2 == 0) NM = dim[2] + 2; 1075 else NM = dim[2] + 1; 1076 1077 for (i = 0; i < local_n0; i++) { 1078 for (j = 0; j < dim[1]; j++) { 1079 for (k = 0; k < dim[2]; k++) { 1080 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 1081 tempindx1 = i * dim[1] * NM + j * NM + k; 1082 1083 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 1084 indx2[tempindx] = low + tempindx1; 1085 } 1086 } 1087 } 1088 1089 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 1090 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 1091 1092 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1093 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1094 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1095 PetscCall(VecScatterDestroy(&vecscat)); 1096 PetscCall(ISDestroy(&list1)); 1097 PetscCall(ISDestroy(&list2)); 1098 PetscCall(PetscFree(indx1)); 1099 PetscCall(PetscFree(indx2)); 1100 #endif 1101 break; 1102 default: 1103 #if defined(PETSC_USE_COMPLEX) 1104 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 1105 1106 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 1107 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 1108 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1109 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1110 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1111 PetscCall(VecScatterDestroy(&vecscat)); 1112 PetscCall(ISDestroy(&list1)); 1113 PetscCall(ISDestroy(&list2)); 1114 #else 1115 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 1116 1117 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 1118 1119 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1120 1121 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1122 1123 partial_dim = fftw->partial_dim; 1124 1125 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 1126 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 1127 1128 if (dim[ndim - 1] % 2 == 0) NM = 2; 1129 else NM = 1; 1130 1131 j = low; 1132 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 1133 indx1[i] = local_0_start * partial_dim + i; 1134 indx2[i] = j; 1135 if (k % dim[ndim - 1] == 0) j += NM; 1136 j++; 1137 } 1138 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 1139 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1140 1141 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1142 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1143 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1144 PetscCall(VecScatterDestroy(&vecscat)); 1145 PetscCall(ISDestroy(&list1)); 1146 PetscCall(ISDestroy(&list2)); 1147 PetscCall(PetscFree(indx1)); 1148 PetscCall(PetscFree(indx2)); 1149 #endif 1150 break; 1151 } 1152 #endif 1153 } 1154 PetscFunctionReturn(0); 1155 } 1156 1157 /* 1158 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1159 1160 Options Database Keys: 1161 + -mat_fftw_plannerflags - set FFTW planner flags 1162 1163 Level: intermediate 1164 1165 */ 1166 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) { 1167 MPI_Comm comm; 1168 Mat_FFT *fft = (Mat_FFT *)A->data; 1169 Mat_FFTW *fftw; 1170 PetscInt ndim = fft->ndim, *dim = fft->dim; 1171 const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 1172 unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1173 PetscBool flg; 1174 PetscInt p_flag, partial_dim = 1, ctr; 1175 PetscMPIInt size, rank; 1176 ptrdiff_t *pdim; 1177 #if !defined(PETSC_USE_COMPLEX) 1178 PetscInt tot_dim; 1179 #endif 1180 1181 PetscFunctionBegin; 1182 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1183 PetscCallMPI(MPI_Comm_size(comm, &size)); 1184 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1185 1186 #if !PetscDefined(HAVE_MPIUNI) 1187 fftw_mpi_init(); 1188 #endif 1189 pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 1190 pdim[0] = dim[0]; 1191 #if !defined(PETSC_USE_COMPLEX) 1192 tot_dim = 2 * dim[0]; 1193 #endif 1194 for (ctr = 1; ctr < ndim; ctr++) { 1195 partial_dim *= dim[ctr]; 1196 pdim[ctr] = dim[ctr]; 1197 #if !defined(PETSC_USE_COMPLEX) 1198 if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1199 else tot_dim *= dim[ctr]; 1200 #endif 1201 } 1202 1203 if (size == 1) { 1204 #if defined(PETSC_USE_COMPLEX) 1205 PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 1206 fft->n = fft->N; 1207 #else 1208 PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 1209 fft->n = tot_dim; 1210 #endif 1211 #if !PetscDefined(HAVE_MPIUNI) 1212 } else { 1213 ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 1214 #if !defined(PETSC_USE_COMPLEX) 1215 ptrdiff_t temp; 1216 PetscInt N1; 1217 #endif 1218 1219 switch (ndim) { 1220 case 1: 1221 #if !defined(PETSC_USE_COMPLEX) 1222 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1223 #else 1224 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1225 fft->n = (PetscInt)local_n0; 1226 PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1227 #endif 1228 break; 1229 case 2: 1230 #if defined(PETSC_USE_COMPLEX) 1231 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1232 fft->n = (PetscInt)local_n0 * dim[1]; 1233 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1234 #else 1235 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1236 1237 fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 1238 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 1239 #endif 1240 break; 1241 case 3: 1242 #if defined(PETSC_USE_COMPLEX) 1243 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1244 1245 fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 1246 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1247 #else 1248 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1249 1250 fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 1251 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 1252 #endif 1253 break; 1254 default: 1255 #if defined(PETSC_USE_COMPLEX) 1256 fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 1257 1258 fft->n = (PetscInt)local_n0 * partial_dim; 1259 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1260 #else 1261 temp = pdim[ndim - 1]; 1262 1263 pdim[ndim - 1] = temp / 2 + 1; 1264 1265 fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1266 1267 fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 1268 N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 1269 1270 pdim[ndim - 1] = temp; 1271 1272 PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1273 #endif 1274 break; 1275 } 1276 #endif 1277 } 1278 free(pdim); 1279 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 1280 PetscCall(PetscNewLog(A, &fftw)); 1281 fft->data = (void *)fftw; 1282 1283 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1284 fftw->partial_dim = partial_dim; 1285 1286 PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 1287 if (size == 1) { 1288 #if defined(PETSC_USE_64BIT_INDICES) 1289 fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1290 #else 1291 fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1292 #endif 1293 } 1294 1295 for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1296 1297 fftw->p_forward = NULL; 1298 fftw->p_backward = NULL; 1299 fftw->p_flag = FFTW_ESTIMATE; 1300 1301 if (size == 1) { 1302 A->ops->mult = MatMult_SeqFFTW; 1303 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1304 #if !PetscDefined(HAVE_MPIUNI) 1305 } else { 1306 A->ops->mult = MatMult_MPIFFTW; 1307 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1308 #endif 1309 } 1310 fft->matdestroy = MatDestroy_FFTW; 1311 A->assembled = PETSC_TRUE; 1312 A->preallocated = PETSC_TRUE; 1313 1314 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 1315 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 1316 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1317 1318 /* get runtime options */ 1319 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 1320 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 1321 if (flg) fftw->p_flag = iplans[p_flag]; 1322 PetscOptionsEnd(); 1323 PetscFunctionReturn(0); 1324 } 1325