1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #if !PetscDefined(HAVE_MPIUNI) 10 #include <fftw3-mpi.h> 11 #else 12 #include <fftw3.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 ptrdiff_t ndim_fftw, *dim_fftw; 18 #if defined(PETSC_USE_64BIT_INDICES) 19 fftw_iodim64 *iodims; 20 #else 21 fftw_iodim *iodims; 22 #endif 23 PetscInt partial_dim; 24 fftw_plan p_forward, p_backward; 25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26 PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be 27 executed for the arrays with which the plan was created */ 28 } Mat_FFTW; 29 30 extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 32 extern PetscErrorCode MatDestroy_FFTW(Mat); 33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 34 #if !PetscDefined(HAVE_MPIUNI) 35 extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 38 #endif 39 40 /* 41 MatMult_SeqFFTW performs forward DFT 42 Input parameter: 43 A - the matrix 44 x - the vector on which FDFT will be performed 45 46 Output parameter: 47 y - vector that stores result of FDFT 48 */ 49 PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 50 { 51 Mat_FFT *fft = (Mat_FFT *)A->data; 52 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 53 const PetscScalar *x_array; 54 PetscScalar *y_array; 55 #if defined(PETSC_USE_COMPLEX) 56 #if defined(PETSC_USE_64BIT_INDICES) 57 fftw_iodim64 *iodims; 58 #else 59 fftw_iodim *iodims; 60 #endif 61 PetscInt i; 62 #endif 63 PetscInt ndim = fft->ndim, *dim = fft->dim; 64 65 PetscFunctionBegin; 66 PetscCall(VecGetArrayRead(x, &x_array)); 67 PetscCall(VecGetArray(y, &y_array)); 68 if (!fftw->p_forward) { /* create a plan, then execute it */ 69 switch (ndim) { 70 case 1: 71 #if defined(PETSC_USE_COMPLEX) 72 fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 73 #else 74 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 75 #endif 76 break; 77 case 2: 78 #if defined(PETSC_USE_COMPLEX) 79 fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 80 #else 81 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 82 #endif 83 break; 84 case 3: 85 #if defined(PETSC_USE_COMPLEX) 86 fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 87 #else 88 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 89 #endif 90 break; 91 default: 92 #if defined(PETSC_USE_COMPLEX) 93 iodims = fftw->iodims; 94 #if defined(PETSC_USE_64BIT_INDICES) 95 if (ndim) { 96 iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 97 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 98 for (i = ndim - 2; i >= 0; --i) { 99 iodims[i].n = (ptrdiff_t)dim[i]; 100 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 101 } 102 } 103 fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 104 #else 105 if (ndim) { 106 iodims[ndim - 1].n = (int)dim[ndim - 1]; 107 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 108 for (i = ndim - 2; i >= 0; --i) { 109 iodims[i].n = (int)dim[i]; 110 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 111 } 112 } 113 fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 114 #endif 115 116 #else 117 fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 118 #endif 119 break; 120 } 121 fftw->finarray = (PetscScalar *)x_array; 122 fftw->foutarray = y_array; 123 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 124 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 125 fftw_execute(fftw->p_forward); 126 } else { /* use existing plan */ 127 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 128 #if defined(PETSC_USE_COMPLEX) 129 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 130 #else 131 fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 132 #endif 133 } else { 134 fftw_execute(fftw->p_forward); 135 } 136 } 137 PetscCall(VecRestoreArray(y, &y_array)); 138 PetscCall(VecRestoreArrayRead(x, &x_array)); 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 /* MatMultTranspose_SeqFFTW performs serial backward DFT 143 Input parameter: 144 A - the matrix 145 x - the vector on which BDFT will be performed 146 147 Output parameter: 148 y - vector that stores result of BDFT 149 */ 150 151 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 152 { 153 Mat_FFT *fft = (Mat_FFT *)A->data; 154 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 155 const PetscScalar *x_array; 156 PetscScalar *y_array; 157 PetscInt ndim = fft->ndim, *dim = fft->dim; 158 #if defined(PETSC_USE_COMPLEX) 159 #if defined(PETSC_USE_64BIT_INDICES) 160 fftw_iodim64 *iodims = fftw->iodims; 161 #else 162 fftw_iodim *iodims = fftw->iodims; 163 #endif 164 #endif 165 166 PetscFunctionBegin; 167 PetscCall(VecGetArrayRead(x, &x_array)); 168 PetscCall(VecGetArray(y, &y_array)); 169 if (!fftw->p_backward) { /* create a plan, then execute it */ 170 switch (ndim) { 171 case 1: 172 #if defined(PETSC_USE_COMPLEX) 173 fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 174 #else 175 fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 176 #endif 177 break; 178 case 2: 179 #if defined(PETSC_USE_COMPLEX) 180 fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 181 #else 182 fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 183 #endif 184 break; 185 case 3: 186 #if defined(PETSC_USE_COMPLEX) 187 fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 188 #else 189 fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 190 #endif 191 break; 192 default: 193 #if defined(PETSC_USE_COMPLEX) 194 #if defined(PETSC_USE_64BIT_INDICES) 195 fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 196 #else 197 fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 198 #endif 199 #else 200 fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 201 #endif 202 break; 203 } 204 fftw->binarray = (PetscScalar *)x_array; 205 fftw->boutarray = y_array; 206 fftw_execute(fftw->p_backward); 207 } else { /* use existing plan */ 208 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 209 #if defined(PETSC_USE_COMPLEX) 210 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 211 #else 212 fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 213 #endif 214 } else { 215 fftw_execute(fftw->p_backward); 216 } 217 } 218 PetscCall(VecRestoreArray(y, &y_array)); 219 PetscCall(VecRestoreArrayRead(x, &x_array)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 223 #if !PetscDefined(HAVE_MPIUNI) 224 /* MatMult_MPIFFTW performs forward DFT in parallel 225 Input parameter: 226 A - the matrix 227 x - the vector on which FDFT will be performed 228 229 Output parameter: 230 y - vector that stores result of FDFT 231 */ 232 PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 233 { 234 Mat_FFT *fft = (Mat_FFT *)A->data; 235 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 236 const PetscScalar *x_array; 237 PetscScalar *y_array; 238 PetscInt ndim = fft->ndim, *dim = fft->dim; 239 MPI_Comm comm; 240 241 PetscFunctionBegin; 242 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 243 PetscCall(VecGetArrayRead(x, &x_array)); 244 PetscCall(VecGetArray(y, &y_array)); 245 if (!fftw->p_forward) { /* create a plan, then execute it */ 246 switch (ndim) { 247 case 1: 248 #if defined(PETSC_USE_COMPLEX) 249 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 250 #else 251 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 252 #endif 253 break; 254 case 2: 255 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 256 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 257 #else 258 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 259 #endif 260 break; 261 case 3: 262 #if defined(PETSC_USE_COMPLEX) 263 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 264 #else 265 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 266 #endif 267 break; 268 default: 269 #if defined(PETSC_USE_COMPLEX) 270 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 271 #else 272 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 273 #endif 274 break; 275 } 276 fftw->finarray = (PetscScalar *)x_array; 277 fftw->foutarray = y_array; 278 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 279 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 280 fftw_execute(fftw->p_forward); 281 } else { /* use existing plan */ 282 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 283 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 284 } else { 285 fftw_execute(fftw->p_forward); 286 } 287 } 288 PetscCall(VecRestoreArray(y, &y_array)); 289 PetscCall(VecRestoreArrayRead(x, &x_array)); 290 PetscFunctionReturn(PETSC_SUCCESS); 291 } 292 293 /* 294 MatMultTranspose_MPIFFTW performs parallel backward DFT 295 Input parameter: 296 A - the matrix 297 x - the vector on which BDFT will be performed 298 299 Output parameter: 300 y - vector that stores result of BDFT 301 */ 302 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 303 { 304 Mat_FFT *fft = (Mat_FFT *)A->data; 305 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 306 const PetscScalar *x_array; 307 PetscScalar *y_array; 308 PetscInt ndim = fft->ndim, *dim = fft->dim; 309 MPI_Comm comm; 310 311 PetscFunctionBegin; 312 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 313 PetscCall(VecGetArrayRead(x, &x_array)); 314 PetscCall(VecGetArray(y, &y_array)); 315 if (!fftw->p_backward) { /* create a plan, then execute it */ 316 switch (ndim) { 317 case 1: 318 #if defined(PETSC_USE_COMPLEX) 319 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 320 #else 321 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 322 #endif 323 break; 324 case 2: 325 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 326 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 327 #else 328 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 329 #endif 330 break; 331 case 3: 332 #if defined(PETSC_USE_COMPLEX) 333 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 334 #else 335 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 336 #endif 337 break; 338 default: 339 #if defined(PETSC_USE_COMPLEX) 340 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 341 #else 342 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 343 #endif 344 break; 345 } 346 fftw->binarray = (PetscScalar *)x_array; 347 fftw->boutarray = y_array; 348 fftw_execute(fftw->p_backward); 349 } else { /* use existing plan */ 350 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 351 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 352 } else { 353 fftw_execute(fftw->p_backward); 354 } 355 } 356 PetscCall(VecRestoreArray(y, &y_array)); 357 PetscCall(VecRestoreArrayRead(x, &x_array)); 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 #endif 361 362 PetscErrorCode MatDestroy_FFTW(Mat A) 363 { 364 Mat_FFT *fft = (Mat_FFT *)A->data; 365 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 366 367 PetscFunctionBegin; 368 fftw_destroy_plan(fftw->p_forward); 369 fftw_destroy_plan(fftw->p_backward); 370 if (fftw->iodims) free(fftw->iodims); 371 PetscCall(PetscFree(fftw->dim_fftw)); 372 PetscCall(PetscFree(fft->data)); 373 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 374 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 375 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 376 #if !PetscDefined(HAVE_MPIUNI) 377 fftw_mpi_cleanup(); 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #if !PetscDefined(HAVE_MPIUNI) 383 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 384 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 385 { 386 PetscScalar *array; 387 388 PetscFunctionBegin; 389 PetscCall(VecGetArray(v, &array)); 390 fftw_free((fftw_complex *)array); 391 PetscCall(VecRestoreArray(v, &array)); 392 PetscCall(VecDestroy_MPI(v)); 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 #endif 396 397 #if !PetscDefined(HAVE_MPIUNI) 398 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 399 { 400 Mat A; 401 402 PetscFunctionBegin; 403 PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 404 PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 405 PetscFunctionReturn(PETSC_SUCCESS); 406 } 407 408 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 409 { 410 Mat A; 411 412 PetscFunctionBegin; 413 PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 414 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 415 PetscFunctionReturn(PETSC_SUCCESS); 416 } 417 418 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 419 { 420 Mat A; 421 422 PetscFunctionBegin; 423 PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 424 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 425 PetscFunctionReturn(PETSC_SUCCESS); 426 } 427 #endif 428 429 /*@ 430 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 431 parallel layout determined by `MATFFTW` 432 433 Collective 434 435 Input Parameter: 436 . A - the matrix 437 438 Output Parameters: 439 + x - (optional) input vector of forward FFTW 440 . y - (optional) output vector of forward FFTW 441 - z - (optional) output vector of backward FFTW 442 443 Options Database Key: 444 . -mat_fftw_plannerflags - set FFTW planner flags 445 446 Level: advanced 447 448 Notes: 449 The parallel layout of output of forward FFTW is always same as the input 450 of the backward FFTW. But parallel layout of the input vector of forward 451 FFTW might not be same as the output of backward FFTW. 452 453 We need to provide enough space while doing parallel real transform. 454 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 455 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 456 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 457 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 458 zeros if it is odd only one zero is needed. 459 460 Lastly one needs some scratch space at the end of data set in each process. alloc_local 461 figures out how much space is needed, i.e. it figures out the data+scratch space for 462 each processor and returns that. 463 464 Developer Note: 465 How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 466 467 .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 468 @*/ 469 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 470 { 471 PetscFunctionBegin; 472 PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 473 PetscFunctionReturn(PETSC_SUCCESS); 474 } 475 476 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 477 { 478 PetscMPIInt size, rank; 479 MPI_Comm comm; 480 Mat_FFT *fft = (Mat_FFT *)A->data; 481 482 PetscFunctionBegin; 483 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 484 PetscValidType(A, 1); 485 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 486 487 PetscCallMPI(MPI_Comm_size(comm, &size)); 488 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 489 if (size == 1) { /* sequential case */ 490 #if defined(PETSC_USE_COMPLEX) 491 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 492 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 493 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 494 #else 495 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 496 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 497 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 498 #endif 499 #if !PetscDefined(HAVE_MPIUNI) 500 } else { /* parallel cases */ 501 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 502 PetscInt ndim = fft->ndim, *dim = fft->dim; 503 ptrdiff_t alloc_local, local_n0, local_0_start; 504 ptrdiff_t local_n1; 505 fftw_complex *data_fout; 506 ptrdiff_t local_1_start; 507 #if defined(PETSC_USE_COMPLEX) 508 fftw_complex *data_fin, *data_bout; 509 #else 510 double *data_finr, *data_boutr; 511 PetscInt n1, N1; 512 ptrdiff_t temp; 513 #endif 514 515 switch (ndim) { 516 case 1: 517 #if !defined(PETSC_USE_COMPLEX) 518 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 519 #else 520 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 521 if (fin) { 522 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 523 PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 524 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 525 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 526 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 527 } 528 if (fout) { 529 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 530 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 531 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 532 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 533 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 536 if (bout) { 537 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 538 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 539 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 540 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 541 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 542 } 543 break; 544 #endif 545 case 2: 546 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 547 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 548 N1 = 2 * dim[0] * (dim[1] / 2 + 1); 549 n1 = 2 * local_n0 * (dim[1] / 2 + 1); 550 if (fin) { 551 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 552 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 553 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 554 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 555 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 556 } 557 if (fout) { 558 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 559 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 560 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 561 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 562 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 563 } 564 if (bout) { 565 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 566 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 567 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 568 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 569 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 570 } 571 #else 572 /* Get local size */ 573 alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 574 if (fin) { 575 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 576 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 577 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 578 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 579 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 580 } 581 if (fout) { 582 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 583 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 584 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 585 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 586 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 587 } 588 if (bout) { 589 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 590 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 591 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 592 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 593 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 594 } 595 #endif 596 break; 597 case 3: 598 #if !defined(PETSC_USE_COMPLEX) 599 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 600 N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 601 n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 602 if (fin) { 603 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 604 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 605 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 606 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 607 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 608 } 609 if (fout) { 610 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 611 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 612 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 613 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 614 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 615 } 616 if (bout) { 617 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 618 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 619 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 620 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 621 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 622 } 623 #else 624 alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 625 if (fin) { 626 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 627 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 628 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 629 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 630 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 631 } 632 if (fout) { 633 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 634 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 635 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 636 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 637 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 638 } 639 if (bout) { 640 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 641 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 642 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 643 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 644 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 645 } 646 #endif 647 break; 648 default: 649 #if !defined(PETSC_USE_COMPLEX) 650 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 651 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 652 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 653 N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 654 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 655 656 if (fin) { 657 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 658 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 659 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 660 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 661 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 662 } 663 if (fout) { 664 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 665 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 666 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 667 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 668 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 669 } 670 if (bout) { 671 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 672 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 673 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 674 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 675 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 676 } 677 #else 678 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 679 if (fin) { 680 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 681 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 682 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 683 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 684 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 685 } 686 if (fout) { 687 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 688 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 689 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 690 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 691 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 692 } 693 if (bout) { 694 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 695 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 696 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 697 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 698 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 699 } 700 #endif 701 break; 702 } 703 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 704 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 705 memory leaks. We void these pointers here to avoid accident uses. 706 */ 707 if (fin) (*fin)->ops->replacearray = NULL; 708 if (fout) (*fout)->ops->replacearray = NULL; 709 if (bout) (*bout)->ops->replacearray = NULL; 710 #endif 711 } 712 PetscFunctionReturn(PETSC_SUCCESS); 713 } 714 715 /*@ 716 VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 717 718 Collective 719 720 Input Parameters: 721 + A - FFTW matrix 722 - x - the PETSc vector 723 724 Output Parameters: 725 . y - the FFTW vector 726 727 Level: intermediate 728 729 Note: 730 For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 731 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 732 zeros. This routine does that job by scattering operation. 733 734 .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 735 @*/ 736 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 737 { 738 PetscFunctionBegin; 739 PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 740 PetscFunctionReturn(PETSC_SUCCESS); 741 } 742 743 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 744 { 745 MPI_Comm comm; 746 Mat_FFT *fft = (Mat_FFT *)A->data; 747 PetscInt low; 748 PetscMPIInt rank, size; 749 PetscInt vsize, vsize1; 750 VecScatter vecscat; 751 IS list1; 752 753 PetscFunctionBegin; 754 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 755 PetscCallMPI(MPI_Comm_size(comm, &size)); 756 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 757 PetscCall(VecGetOwnershipRange(y, &low, NULL)); 758 759 if (size == 1) { 760 PetscCall(VecGetSize(x, &vsize)); 761 PetscCall(VecGetSize(y, &vsize1)); 762 PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 763 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 764 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 765 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 766 PetscCall(VecScatterDestroy(&vecscat)); 767 PetscCall(ISDestroy(&list1)); 768 #if !PetscDefined(HAVE_MPIUNI) 769 } else { 770 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 771 PetscInt ndim = fft->ndim, *dim = fft->dim; 772 ptrdiff_t local_n0, local_0_start; 773 ptrdiff_t local_n1, local_1_start; 774 IS list2; 775 #if !defined(PETSC_USE_COMPLEX) 776 PetscInt i, j, k, partial_dim; 777 PetscInt *indx1, *indx2, tempindx, tempindx1; 778 PetscInt NM; 779 ptrdiff_t temp; 780 #endif 781 782 switch (ndim) { 783 case 1: 784 #if defined(PETSC_USE_COMPLEX) 785 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 786 787 PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 788 PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 789 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 790 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 791 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 792 PetscCall(VecScatterDestroy(&vecscat)); 793 PetscCall(ISDestroy(&list1)); 794 PetscCall(ISDestroy(&list2)); 795 #else 796 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 797 #endif 798 break; 799 case 2: 800 #if defined(PETSC_USE_COMPLEX) 801 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 802 803 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 804 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 805 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 806 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 807 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 808 PetscCall(VecScatterDestroy(&vecscat)); 809 PetscCall(ISDestroy(&list1)); 810 PetscCall(ISDestroy(&list2)); 811 #else 812 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 813 814 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 815 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 816 817 if (dim[1] % 2 == 0) { 818 NM = dim[1] + 2; 819 } else { 820 NM = dim[1] + 1; 821 } 822 for (i = 0; i < local_n0; i++) { 823 for (j = 0; j < dim[1]; j++) { 824 tempindx = i * dim[1] + j; 825 tempindx1 = i * NM + j; 826 827 indx1[tempindx] = local_0_start * dim[1] + tempindx; 828 indx2[tempindx] = low + tempindx1; 829 } 830 } 831 832 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 833 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 834 835 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 836 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 837 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 838 PetscCall(VecScatterDestroy(&vecscat)); 839 PetscCall(ISDestroy(&list1)); 840 PetscCall(ISDestroy(&list2)); 841 PetscCall(PetscFree(indx1)); 842 PetscCall(PetscFree(indx2)); 843 #endif 844 break; 845 846 case 3: 847 #if defined(PETSC_USE_COMPLEX) 848 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 849 850 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 851 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 852 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 853 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 854 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 855 PetscCall(VecScatterDestroy(&vecscat)); 856 PetscCall(ISDestroy(&list1)); 857 PetscCall(ISDestroy(&list2)); 858 #else 859 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 860 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 861 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 862 863 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 864 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 865 866 if (dim[2] % 2 == 0) NM = dim[2] + 2; 867 else NM = dim[2] + 1; 868 869 for (i = 0; i < local_n0; i++) { 870 for (j = 0; j < dim[1]; j++) { 871 for (k = 0; k < dim[2]; k++) { 872 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 873 tempindx1 = i * dim[1] * NM + j * NM + k; 874 875 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 876 indx2[tempindx] = low + tempindx1; 877 } 878 } 879 } 880 881 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 882 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 883 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 884 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 885 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 886 PetscCall(VecScatterDestroy(&vecscat)); 887 PetscCall(ISDestroy(&list1)); 888 PetscCall(ISDestroy(&list2)); 889 PetscCall(PetscFree(indx1)); 890 PetscCall(PetscFree(indx2)); 891 #endif 892 break; 893 894 default: 895 #if defined(PETSC_USE_COMPLEX) 896 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 897 898 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 899 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 900 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 901 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 902 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 903 PetscCall(VecScatterDestroy(&vecscat)); 904 PetscCall(ISDestroy(&list1)); 905 PetscCall(ISDestroy(&list2)); 906 #else 907 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 908 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 909 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 910 911 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 912 913 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 914 915 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 916 917 partial_dim = fftw->partial_dim; 918 919 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 920 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 921 922 if (dim[ndim - 1] % 2 == 0) NM = 2; 923 else NM = 1; 924 925 j = low; 926 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 927 indx1[i] = local_0_start * partial_dim + i; 928 indx2[i] = j; 929 if (k % dim[ndim - 1] == 0) j += NM; 930 j++; 931 } 932 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 933 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 934 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 935 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 936 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 937 PetscCall(VecScatterDestroy(&vecscat)); 938 PetscCall(ISDestroy(&list1)); 939 PetscCall(ISDestroy(&list2)); 940 PetscCall(PetscFree(indx1)); 941 PetscCall(PetscFree(indx2)); 942 #endif 943 break; 944 } 945 #endif 946 } 947 PetscFunctionReturn(PETSC_SUCCESS); 948 } 949 950 /*@ 951 VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 952 953 Collective 954 955 Input Parameters: 956 + A - `MATFFTW` matrix 957 - x - FFTW vector 958 959 Output Parameters: 960 . y - PETSc vector 961 962 Level: intermediate 963 964 Note: 965 While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 966 `VecScatterFFTWToPetsc()` removes those extra zeros. 967 968 .seealso: [](chapter_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 969 @*/ 970 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 971 { 972 PetscFunctionBegin; 973 PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 974 PetscFunctionReturn(PETSC_SUCCESS); 975 } 976 977 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 978 { 979 MPI_Comm comm; 980 Mat_FFT *fft = (Mat_FFT *)A->data; 981 PetscInt low; 982 PetscMPIInt rank, size; 983 VecScatter vecscat; 984 IS list1; 985 986 PetscFunctionBegin; 987 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 988 PetscCallMPI(MPI_Comm_size(comm, &size)); 989 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 990 PetscCall(VecGetOwnershipRange(x, &low, NULL)); 991 992 if (size == 1) { 993 PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 994 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 995 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 996 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 997 PetscCall(VecScatterDestroy(&vecscat)); 998 PetscCall(ISDestroy(&list1)); 999 1000 #if !PetscDefined(HAVE_MPIUNI) 1001 } else { 1002 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 1003 PetscInt ndim = fft->ndim, *dim = fft->dim; 1004 ptrdiff_t local_n0, local_0_start; 1005 ptrdiff_t local_n1, local_1_start; 1006 IS list2; 1007 #if !defined(PETSC_USE_COMPLEX) 1008 PetscInt i, j, k, partial_dim; 1009 PetscInt *indx1, *indx2, tempindx, tempindx1; 1010 PetscInt NM; 1011 ptrdiff_t temp; 1012 #endif 1013 switch (ndim) { 1014 case 1: 1015 #if defined(PETSC_USE_COMPLEX) 1016 fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1017 1018 PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 1019 PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 1020 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1021 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1022 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1023 PetscCall(VecScatterDestroy(&vecscat)); 1024 PetscCall(ISDestroy(&list1)); 1025 PetscCall(ISDestroy(&list2)); 1026 #else 1027 SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 1028 #endif 1029 break; 1030 case 2: 1031 #if defined(PETSC_USE_COMPLEX) 1032 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1033 1034 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 1035 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 1036 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1037 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1038 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1039 PetscCall(VecScatterDestroy(&vecscat)); 1040 PetscCall(ISDestroy(&list1)); 1041 PetscCall(ISDestroy(&list2)); 1042 #else 1043 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1044 1045 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 1046 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 1047 1048 if (dim[1] % 2 == 0) NM = dim[1] + 2; 1049 else NM = dim[1] + 1; 1050 1051 for (i = 0; i < local_n0; i++) { 1052 for (j = 0; j < dim[1]; j++) { 1053 tempindx = i * dim[1] + j; 1054 tempindx1 = i * NM + j; 1055 1056 indx1[tempindx] = local_0_start * dim[1] + tempindx; 1057 indx2[tempindx] = low + tempindx1; 1058 } 1059 } 1060 1061 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 1062 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 1063 1064 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1065 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1067 PetscCall(VecScatterDestroy(&vecscat)); 1068 PetscCall(ISDestroy(&list1)); 1069 PetscCall(ISDestroy(&list2)); 1070 PetscCall(PetscFree(indx1)); 1071 PetscCall(PetscFree(indx2)); 1072 #endif 1073 break; 1074 case 3: 1075 #if defined(PETSC_USE_COMPLEX) 1076 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1077 1078 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 1079 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 1080 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1081 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1082 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1083 PetscCall(VecScatterDestroy(&vecscat)); 1084 PetscCall(ISDestroy(&list1)); 1085 PetscCall(ISDestroy(&list2)); 1086 #else 1087 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1088 1089 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 1090 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 1091 1092 if (dim[2] % 2 == 0) NM = dim[2] + 2; 1093 else NM = dim[2] + 1; 1094 1095 for (i = 0; i < local_n0; i++) { 1096 for (j = 0; j < dim[1]; j++) { 1097 for (k = 0; k < dim[2]; k++) { 1098 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 1099 tempindx1 = i * dim[1] * NM + j * NM + k; 1100 1101 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 1102 indx2[tempindx] = low + tempindx1; 1103 } 1104 } 1105 } 1106 1107 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 1108 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 1109 1110 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1111 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1112 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1113 PetscCall(VecScatterDestroy(&vecscat)); 1114 PetscCall(ISDestroy(&list1)); 1115 PetscCall(ISDestroy(&list2)); 1116 PetscCall(PetscFree(indx1)); 1117 PetscCall(PetscFree(indx2)); 1118 #endif 1119 break; 1120 default: 1121 #if defined(PETSC_USE_COMPLEX) 1122 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 1123 1124 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 1125 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 1126 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1127 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1128 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1129 PetscCall(VecScatterDestroy(&vecscat)); 1130 PetscCall(ISDestroy(&list1)); 1131 PetscCall(ISDestroy(&list2)); 1132 #else 1133 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 1134 1135 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 1136 1137 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1138 1139 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1140 1141 partial_dim = fftw->partial_dim; 1142 1143 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 1144 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 1145 1146 if (dim[ndim - 1] % 2 == 0) NM = 2; 1147 else NM = 1; 1148 1149 j = low; 1150 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 1151 indx1[i] = local_0_start * partial_dim + i; 1152 indx2[i] = j; 1153 if (k % dim[ndim - 1] == 0) j += NM; 1154 j++; 1155 } 1156 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 1157 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1158 1159 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1160 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1161 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1162 PetscCall(VecScatterDestroy(&vecscat)); 1163 PetscCall(ISDestroy(&list1)); 1164 PetscCall(ISDestroy(&list2)); 1165 PetscCall(PetscFree(indx1)); 1166 PetscCall(PetscFree(indx2)); 1167 #endif 1168 break; 1169 } 1170 #endif 1171 } 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1176 { 1177 MPI_Comm comm; 1178 Mat_FFT *fft = (Mat_FFT *)A->data; 1179 Mat_FFTW *fftw; 1180 PetscInt ndim = fft->ndim, *dim = fft->dim; 1181 const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 1182 unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1183 PetscBool flg; 1184 PetscInt p_flag, partial_dim = 1, ctr; 1185 PetscMPIInt size, rank; 1186 ptrdiff_t *pdim; 1187 #if !defined(PETSC_USE_COMPLEX) 1188 PetscInt tot_dim; 1189 #endif 1190 1191 PetscFunctionBegin; 1192 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1193 PetscCallMPI(MPI_Comm_size(comm, &size)); 1194 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1195 1196 #if !PetscDefined(HAVE_MPIUNI) 1197 fftw_mpi_init(); 1198 #endif 1199 pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 1200 pdim[0] = dim[0]; 1201 #if !defined(PETSC_USE_COMPLEX) 1202 tot_dim = 2 * dim[0]; 1203 #endif 1204 for (ctr = 1; ctr < ndim; ctr++) { 1205 partial_dim *= dim[ctr]; 1206 pdim[ctr] = dim[ctr]; 1207 #if !defined(PETSC_USE_COMPLEX) 1208 if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1209 else tot_dim *= dim[ctr]; 1210 #endif 1211 } 1212 1213 if (size == 1) { 1214 #if defined(PETSC_USE_COMPLEX) 1215 PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 1216 fft->n = fft->N; 1217 #else 1218 PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 1219 fft->n = tot_dim; 1220 #endif 1221 #if !PetscDefined(HAVE_MPIUNI) 1222 } else { 1223 ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 1224 #if !defined(PETSC_USE_COMPLEX) 1225 ptrdiff_t temp; 1226 PetscInt N1; 1227 #endif 1228 1229 switch (ndim) { 1230 case 1: 1231 #if !defined(PETSC_USE_COMPLEX) 1232 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1233 #else 1234 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1235 fft->n = (PetscInt)local_n0; 1236 PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1237 #endif 1238 break; 1239 case 2: 1240 #if defined(PETSC_USE_COMPLEX) 1241 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1242 fft->n = (PetscInt)local_n0 * dim[1]; 1243 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1244 #else 1245 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1246 1247 fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 1248 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 1249 #endif 1250 break; 1251 case 3: 1252 #if defined(PETSC_USE_COMPLEX) 1253 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1254 1255 fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 1256 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1257 #else 1258 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1259 1260 fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 1261 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 1262 #endif 1263 break; 1264 default: 1265 #if defined(PETSC_USE_COMPLEX) 1266 fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 1267 1268 fft->n = (PetscInt)local_n0 * partial_dim; 1269 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1270 #else 1271 temp = pdim[ndim - 1]; 1272 1273 pdim[ndim - 1] = temp / 2 + 1; 1274 1275 fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1276 1277 fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 1278 N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 1279 1280 pdim[ndim - 1] = temp; 1281 1282 PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1283 #endif 1284 break; 1285 } 1286 #endif 1287 } 1288 free(pdim); 1289 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 1290 PetscCall(PetscNew(&fftw)); 1291 fft->data = (void *)fftw; 1292 1293 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1294 fftw->partial_dim = partial_dim; 1295 1296 PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 1297 if (size == 1) { 1298 #if defined(PETSC_USE_64BIT_INDICES) 1299 fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1300 #else 1301 fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1302 #endif 1303 } 1304 1305 for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1306 1307 fftw->p_forward = NULL; 1308 fftw->p_backward = NULL; 1309 fftw->p_flag = FFTW_ESTIMATE; 1310 1311 if (size == 1) { 1312 A->ops->mult = MatMult_SeqFFTW; 1313 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1314 #if !PetscDefined(HAVE_MPIUNI) 1315 } else { 1316 A->ops->mult = MatMult_MPIFFTW; 1317 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1318 #endif 1319 } 1320 fft->matdestroy = MatDestroy_FFTW; 1321 A->assembled = PETSC_TRUE; 1322 A->preallocated = PETSC_TRUE; 1323 1324 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 1325 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1327 1328 /* get runtime options */ 1329 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 1330 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 1331 if (flg) fftw->p_flag = iplans[p_flag]; 1332 PetscOptionsEnd(); 1333 PetscFunctionReturn(PETSC_SUCCESS); 1334 } 1335