1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #if !PetscDefined(HAVE_MPIUNI) 10 #include <fftw3-mpi.h> 11 #else 12 #include <fftw3.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 ptrdiff_t ndim_fftw, *dim_fftw; 18 #if defined(PETSC_USE_64BIT_INDICES) 19 fftw_iodim64 *iodims; 20 #else 21 fftw_iodim *iodims; 22 #endif 23 PetscInt partial_dim; 24 fftw_plan p_forward, p_backward; 25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26 PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be 27 executed for the arrays with which the plan was created */ 28 } Mat_FFTW; 29 30 extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 32 extern PetscErrorCode MatDestroy_FFTW(Mat); 33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 34 #if !PetscDefined(HAVE_MPIUNI) 35 extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 38 #endif 39 40 /* 41 MatMult_SeqFFTW performs forward DFT 42 Input parameter: 43 A - the matrix 44 x - the vector on which FDFT will be performed 45 46 Output parameter: 47 y - vector that stores result of FDFT 48 */ 49 PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 50 { 51 Mat_FFT *fft = (Mat_FFT *)A->data; 52 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 53 const PetscScalar *x_array; 54 PetscScalar *y_array; 55 #if defined(PETSC_USE_COMPLEX) 56 #if defined(PETSC_USE_64BIT_INDICES) 57 fftw_iodim64 *iodims; 58 #else 59 fftw_iodim *iodims; 60 #endif 61 PetscInt i; 62 #endif 63 PetscInt ndim = fft->ndim, *dim = fft->dim; 64 65 PetscFunctionBegin; 66 PetscCall(VecGetArrayRead(x, &x_array)); 67 PetscCall(VecGetArray(y, &y_array)); 68 if (!fftw->p_forward) { /* create a plan, then execute it */ 69 switch (ndim) { 70 case 1: 71 #if defined(PETSC_USE_COMPLEX) 72 fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 73 #else 74 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 75 #endif 76 break; 77 case 2: 78 #if defined(PETSC_USE_COMPLEX) 79 fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 80 #else 81 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 82 #endif 83 break; 84 case 3: 85 #if defined(PETSC_USE_COMPLEX) 86 fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 87 #else 88 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 89 #endif 90 break; 91 default: 92 #if defined(PETSC_USE_COMPLEX) 93 iodims = fftw->iodims; 94 #if defined(PETSC_USE_64BIT_INDICES) 95 if (ndim) { 96 iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 97 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 98 for (i = ndim - 2; i >= 0; --i) { 99 iodims[i].n = (ptrdiff_t)dim[i]; 100 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 101 } 102 } 103 fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 104 #else 105 if (ndim) { 106 iodims[ndim - 1].n = (int)dim[ndim - 1]; 107 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 108 for (i = ndim - 2; i >= 0; --i) { 109 iodims[i].n = (int)dim[i]; 110 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 111 } 112 } 113 fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 114 #endif 115 116 #else 117 fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 118 #endif 119 break; 120 } 121 fftw->finarray = (PetscScalar *)x_array; 122 fftw->foutarray = y_array; 123 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 124 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 125 fftw_execute(fftw->p_forward); 126 } else { /* use existing plan */ 127 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 128 #if defined(PETSC_USE_COMPLEX) 129 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 130 #else 131 fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 132 #endif 133 } else { 134 fftw_execute(fftw->p_forward); 135 } 136 } 137 PetscCall(VecRestoreArray(y, &y_array)); 138 PetscCall(VecRestoreArrayRead(x, &x_array)); 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 /* MatMultTranspose_SeqFFTW performs serial backward DFT 143 Input parameter: 144 A - the matrix 145 x - the vector on which BDFT will be performed 146 147 Output parameter: 148 y - vector that stores result of BDFT 149 */ 150 151 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 152 { 153 Mat_FFT *fft = (Mat_FFT *)A->data; 154 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 155 const PetscScalar *x_array; 156 PetscScalar *y_array; 157 PetscInt ndim = fft->ndim, *dim = fft->dim; 158 #if defined(PETSC_USE_COMPLEX) 159 #if defined(PETSC_USE_64BIT_INDICES) 160 fftw_iodim64 *iodims = fftw->iodims; 161 #else 162 fftw_iodim *iodims = fftw->iodims; 163 #endif 164 #endif 165 166 PetscFunctionBegin; 167 PetscCall(VecGetArrayRead(x, &x_array)); 168 PetscCall(VecGetArray(y, &y_array)); 169 if (!fftw->p_backward) { /* create a plan, then execute it */ 170 switch (ndim) { 171 case 1: 172 #if defined(PETSC_USE_COMPLEX) 173 fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 174 #else 175 fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 176 #endif 177 break; 178 case 2: 179 #if defined(PETSC_USE_COMPLEX) 180 fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 181 #else 182 fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 183 #endif 184 break; 185 case 3: 186 #if defined(PETSC_USE_COMPLEX) 187 fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 188 #else 189 fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 190 #endif 191 break; 192 default: 193 #if defined(PETSC_USE_COMPLEX) 194 #if defined(PETSC_USE_64BIT_INDICES) 195 fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 196 #else 197 fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 198 #endif 199 #else 200 fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 201 #endif 202 break; 203 } 204 fftw->binarray = (PetscScalar *)x_array; 205 fftw->boutarray = y_array; 206 fftw_execute(fftw->p_backward); 207 } else { /* use existing plan */ 208 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 209 #if defined(PETSC_USE_COMPLEX) 210 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 211 #else 212 fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 213 #endif 214 } else { 215 fftw_execute(fftw->p_backward); 216 } 217 } 218 PetscCall(VecRestoreArray(y, &y_array)); 219 PetscCall(VecRestoreArrayRead(x, &x_array)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 223 #if !PetscDefined(HAVE_MPIUNI) 224 /* MatMult_MPIFFTW performs forward DFT in parallel 225 Input parameter: 226 A - the matrix 227 x - the vector on which FDFT will be performed 228 229 Output parameter: 230 y - vector that stores result of FDFT 231 */ 232 PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 233 { 234 Mat_FFT *fft = (Mat_FFT *)A->data; 235 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 236 const PetscScalar *x_array; 237 PetscScalar *y_array; 238 PetscInt ndim = fft->ndim, *dim = fft->dim; 239 MPI_Comm comm; 240 241 PetscFunctionBegin; 242 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 243 PetscCall(VecGetArrayRead(x, &x_array)); 244 PetscCall(VecGetArray(y, &y_array)); 245 if (!fftw->p_forward) { /* create a plan, then execute it */ 246 switch (ndim) { 247 case 1: 248 #if defined(PETSC_USE_COMPLEX) 249 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 250 #else 251 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 252 #endif 253 break; 254 case 2: 255 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 256 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 257 #else 258 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 259 #endif 260 break; 261 case 3: 262 #if defined(PETSC_USE_COMPLEX) 263 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 264 #else 265 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 266 #endif 267 break; 268 default: 269 #if defined(PETSC_USE_COMPLEX) 270 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 271 #else 272 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 273 #endif 274 break; 275 } 276 fftw->finarray = (PetscScalar *)x_array; 277 fftw->foutarray = y_array; 278 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 279 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 280 fftw_execute(fftw->p_forward); 281 } else { /* use existing plan */ 282 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 283 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 284 } else { 285 fftw_execute(fftw->p_forward); 286 } 287 } 288 PetscCall(VecRestoreArray(y, &y_array)); 289 PetscCall(VecRestoreArrayRead(x, &x_array)); 290 PetscFunctionReturn(PETSC_SUCCESS); 291 } 292 293 /* 294 MatMultTranspose_MPIFFTW performs parallel backward DFT 295 Input parameter: 296 A - the matrix 297 x - the vector on which BDFT will be performed 298 299 Output parameter: 300 y - vector that stores result of BDFT 301 */ 302 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 303 { 304 Mat_FFT *fft = (Mat_FFT *)A->data; 305 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 306 const PetscScalar *x_array; 307 PetscScalar *y_array; 308 PetscInt ndim = fft->ndim, *dim = fft->dim; 309 MPI_Comm comm; 310 311 PetscFunctionBegin; 312 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 313 PetscCall(VecGetArrayRead(x, &x_array)); 314 PetscCall(VecGetArray(y, &y_array)); 315 if (!fftw->p_backward) { /* create a plan, then execute it */ 316 switch (ndim) { 317 case 1: 318 #if defined(PETSC_USE_COMPLEX) 319 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 320 #else 321 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 322 #endif 323 break; 324 case 2: 325 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 326 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 327 #else 328 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 329 #endif 330 break; 331 case 3: 332 #if defined(PETSC_USE_COMPLEX) 333 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 334 #else 335 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 336 #endif 337 break; 338 default: 339 #if defined(PETSC_USE_COMPLEX) 340 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 341 #else 342 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 343 #endif 344 break; 345 } 346 fftw->binarray = (PetscScalar *)x_array; 347 fftw->boutarray = y_array; 348 fftw_execute(fftw->p_backward); 349 } else { /* use existing plan */ 350 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 351 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 352 } else { 353 fftw_execute(fftw->p_backward); 354 } 355 } 356 PetscCall(VecRestoreArray(y, &y_array)); 357 PetscCall(VecRestoreArrayRead(x, &x_array)); 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 #endif 361 362 PetscErrorCode MatDestroy_FFTW(Mat A) 363 { 364 Mat_FFT *fft = (Mat_FFT *)A->data; 365 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 366 367 PetscFunctionBegin; 368 fftw_destroy_plan(fftw->p_forward); 369 fftw_destroy_plan(fftw->p_backward); 370 if (fftw->iodims) free(fftw->iodims); 371 PetscCall(PetscFree(fftw->dim_fftw)); 372 PetscCall(PetscFree(fft->data)); 373 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 374 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 375 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 376 #if !PetscDefined(HAVE_MPIUNI) 377 fftw_mpi_cleanup(); 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #if !PetscDefined(HAVE_MPIUNI) 383 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 384 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 385 { 386 PetscScalar *array; 387 388 PetscFunctionBegin; 389 PetscCall(VecGetArray(v, &array)); 390 fftw_free((fftw_complex *)array); 391 PetscCall(VecRestoreArray(v, &array)); 392 PetscCall(VecDestroy_MPI(v)); 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 #endif 396 397 #if !PetscDefined(HAVE_MPIUNI) 398 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 399 { 400 Mat A; 401 402 PetscFunctionBegin; 403 PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 404 PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 405 PetscFunctionReturn(PETSC_SUCCESS); 406 } 407 408 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 409 { 410 Mat A; 411 412 PetscFunctionBegin; 413 PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 414 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 415 PetscFunctionReturn(PETSC_SUCCESS); 416 } 417 418 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 419 { 420 Mat A; 421 422 PetscFunctionBegin; 423 PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 424 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 425 PetscFunctionReturn(PETSC_SUCCESS); 426 } 427 #endif 428 429 /*@ 430 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 431 parallel layout determined by `MATFFTW` 432 433 Collective 434 435 Input Parameter: 436 . A - the matrix 437 438 Output Parameters: 439 + x - (optional) input vector of forward FFTW 440 . y - (optional) output vector of forward FFTW 441 - z - (optional) output vector of backward FFTW 442 443 Level: advanced 444 445 Notes: 446 The parallel layout of output of forward FFTW is always same as the input 447 of the backward FFTW. But parallel layout of the input vector of forward 448 FFTW might not be same as the output of backward FFTW. 449 450 We need to provide enough space while doing parallel real transform. 451 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 452 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 453 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 454 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 455 zeros if it is odd only one zero is needed. 456 457 Lastly one needs some scratch space at the end of data set in each process. alloc_local 458 figures out how much space is needed, i.e. it figures out the data+scratch space for 459 each processor and returns that. 460 461 Developer Note: 462 How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 463 464 .seealso: `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 465 @*/ 466 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 467 { 468 PetscFunctionBegin; 469 PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 470 PetscFunctionReturn(PETSC_SUCCESS); 471 } 472 473 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 474 { 475 PetscMPIInt size, rank; 476 MPI_Comm comm; 477 Mat_FFT *fft = (Mat_FFT *)A->data; 478 479 PetscFunctionBegin; 480 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 481 PetscValidType(A, 1); 482 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 483 484 PetscCallMPI(MPI_Comm_size(comm, &size)); 485 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 486 if (size == 1) { /* sequential case */ 487 #if defined(PETSC_USE_COMPLEX) 488 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 489 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 490 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 491 #else 492 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 493 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 494 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 495 #endif 496 #if !PetscDefined(HAVE_MPIUNI) 497 } else { /* parallel cases */ 498 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 499 PetscInt ndim = fft->ndim, *dim = fft->dim; 500 ptrdiff_t alloc_local, local_n0, local_0_start; 501 ptrdiff_t local_n1; 502 fftw_complex *data_fout; 503 ptrdiff_t local_1_start; 504 #if defined(PETSC_USE_COMPLEX) 505 fftw_complex *data_fin, *data_bout; 506 #else 507 double *data_finr, *data_boutr; 508 PetscInt n1, N1; 509 ptrdiff_t temp; 510 #endif 511 512 switch (ndim) { 513 case 1: 514 #if !defined(PETSC_USE_COMPLEX) 515 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 516 #else 517 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 518 if (fin) { 519 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 520 PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 521 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 522 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 523 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 524 } 525 if (fout) { 526 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 527 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 528 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 529 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 530 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 531 } 532 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 533 if (bout) { 534 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 535 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 536 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 537 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 538 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 break; 541 #endif 542 case 2: 543 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 544 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 545 N1 = 2 * dim[0] * (dim[1] / 2 + 1); 546 n1 = 2 * local_n0 * (dim[1] / 2 + 1); 547 if (fin) { 548 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 549 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 550 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 551 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 552 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 553 } 554 if (fout) { 555 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 556 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 557 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 558 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 559 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 560 } 561 if (bout) { 562 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 563 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 564 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 565 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 566 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 567 } 568 #else 569 /* Get local size */ 570 alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 571 if (fin) { 572 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 573 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 574 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 575 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 576 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 577 } 578 if (fout) { 579 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 580 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 581 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 582 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 583 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 584 } 585 if (bout) { 586 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 587 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 588 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 589 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 590 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 591 } 592 #endif 593 break; 594 case 3: 595 #if !defined(PETSC_USE_COMPLEX) 596 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 597 N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 598 n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 599 if (fin) { 600 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 601 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 602 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 603 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 604 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 605 } 606 if (fout) { 607 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 608 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 609 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 610 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 611 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 612 } 613 if (bout) { 614 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 615 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 616 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 617 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 618 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 619 } 620 #else 621 alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 622 if (fin) { 623 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 624 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 625 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 626 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 627 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 628 } 629 if (fout) { 630 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 631 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 632 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 633 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 634 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 635 } 636 if (bout) { 637 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 638 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 639 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 640 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 641 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 642 } 643 #endif 644 break; 645 default: 646 #if !defined(PETSC_USE_COMPLEX) 647 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 648 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 649 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 650 N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 651 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 652 653 if (fin) { 654 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 655 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 656 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 657 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 658 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 659 } 660 if (fout) { 661 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 662 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 663 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 664 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 665 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 666 } 667 if (bout) { 668 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 669 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 670 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 671 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 672 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 673 } 674 #else 675 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 676 if (fin) { 677 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 678 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 679 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 680 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 681 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 682 } 683 if (fout) { 684 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 685 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 686 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 687 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 688 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 689 } 690 if (bout) { 691 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 692 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 693 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 694 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 695 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 696 } 697 #endif 698 break; 699 } 700 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 701 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 702 memory leaks. We void these pointers here to avoid accident uses. 703 */ 704 if (fin) (*fin)->ops->replacearray = NULL; 705 if (fout) (*fout)->ops->replacearray = NULL; 706 if (bout) (*bout)->ops->replacearray = NULL; 707 #endif 708 } 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 /*@ 713 VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 714 715 Collective 716 717 Input Parameters: 718 + A - FFTW matrix 719 - x - the PETSc vector 720 721 Output Parameters: 722 . y - the FFTW vector 723 724 Level: intermediate 725 726 Note: 727 For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 728 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 729 zeros. This routine does that job by scattering operation. 730 731 .seealso: `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 732 @*/ 733 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 734 { 735 PetscFunctionBegin; 736 PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 741 { 742 MPI_Comm comm; 743 Mat_FFT *fft = (Mat_FFT *)A->data; 744 PetscInt low; 745 PetscMPIInt rank, size; 746 PetscInt vsize, vsize1; 747 VecScatter vecscat; 748 IS list1; 749 750 PetscFunctionBegin; 751 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 752 PetscCallMPI(MPI_Comm_size(comm, &size)); 753 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 754 PetscCall(VecGetOwnershipRange(y, &low, NULL)); 755 756 if (size == 1) { 757 PetscCall(VecGetSize(x, &vsize)); 758 PetscCall(VecGetSize(y, &vsize1)); 759 PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 760 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 761 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 762 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 763 PetscCall(VecScatterDestroy(&vecscat)); 764 PetscCall(ISDestroy(&list1)); 765 #if !PetscDefined(HAVE_MPIUNI) 766 } else { 767 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 768 PetscInt ndim = fft->ndim, *dim = fft->dim; 769 ptrdiff_t local_n0, local_0_start; 770 ptrdiff_t local_n1, local_1_start; 771 IS list2; 772 #if !defined(PETSC_USE_COMPLEX) 773 PetscInt i, j, k, partial_dim; 774 PetscInt *indx1, *indx2, tempindx, tempindx1; 775 PetscInt NM; 776 ptrdiff_t temp; 777 #endif 778 779 switch (ndim) { 780 case 1: 781 #if defined(PETSC_USE_COMPLEX) 782 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 783 784 PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 785 PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 786 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 787 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 788 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 789 PetscCall(VecScatterDestroy(&vecscat)); 790 PetscCall(ISDestroy(&list1)); 791 PetscCall(ISDestroy(&list2)); 792 #else 793 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 794 #endif 795 break; 796 case 2: 797 #if defined(PETSC_USE_COMPLEX) 798 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 799 800 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 801 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 802 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 803 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 804 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 805 PetscCall(VecScatterDestroy(&vecscat)); 806 PetscCall(ISDestroy(&list1)); 807 PetscCall(ISDestroy(&list2)); 808 #else 809 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 810 811 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 812 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 813 814 if (dim[1] % 2 == 0) { 815 NM = dim[1] + 2; 816 } else { 817 NM = dim[1] + 1; 818 } 819 for (i = 0; i < local_n0; i++) { 820 for (j = 0; j < dim[1]; j++) { 821 tempindx = i * dim[1] + j; 822 tempindx1 = i * NM + j; 823 824 indx1[tempindx] = local_0_start * dim[1] + tempindx; 825 indx2[tempindx] = low + tempindx1; 826 } 827 } 828 829 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 830 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 831 832 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 833 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 834 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 835 PetscCall(VecScatterDestroy(&vecscat)); 836 PetscCall(ISDestroy(&list1)); 837 PetscCall(ISDestroy(&list2)); 838 PetscCall(PetscFree(indx1)); 839 PetscCall(PetscFree(indx2)); 840 #endif 841 break; 842 843 case 3: 844 #if defined(PETSC_USE_COMPLEX) 845 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 846 847 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 848 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 849 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 850 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 851 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 852 PetscCall(VecScatterDestroy(&vecscat)); 853 PetscCall(ISDestroy(&list1)); 854 PetscCall(ISDestroy(&list2)); 855 #else 856 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 857 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 858 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 859 860 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 861 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 862 863 if (dim[2] % 2 == 0) NM = dim[2] + 2; 864 else NM = dim[2] + 1; 865 866 for (i = 0; i < local_n0; i++) { 867 for (j = 0; j < dim[1]; j++) { 868 for (k = 0; k < dim[2]; k++) { 869 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 870 tempindx1 = i * dim[1] * NM + j * NM + k; 871 872 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 873 indx2[tempindx] = low + tempindx1; 874 } 875 } 876 } 877 878 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 879 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 880 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 881 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 882 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 883 PetscCall(VecScatterDestroy(&vecscat)); 884 PetscCall(ISDestroy(&list1)); 885 PetscCall(ISDestroy(&list2)); 886 PetscCall(PetscFree(indx1)); 887 PetscCall(PetscFree(indx2)); 888 #endif 889 break; 890 891 default: 892 #if defined(PETSC_USE_COMPLEX) 893 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 894 895 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 896 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 897 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 898 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 899 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 900 PetscCall(VecScatterDestroy(&vecscat)); 901 PetscCall(ISDestroy(&list1)); 902 PetscCall(ISDestroy(&list2)); 903 #else 904 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 905 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 906 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 907 908 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 909 910 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 911 912 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 913 914 partial_dim = fftw->partial_dim; 915 916 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 917 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 918 919 if (dim[ndim - 1] % 2 == 0) NM = 2; 920 else NM = 1; 921 922 j = low; 923 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 924 indx1[i] = local_0_start * partial_dim + i; 925 indx2[i] = j; 926 if (k % dim[ndim - 1] == 0) j += NM; 927 j++; 928 } 929 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 930 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 931 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 932 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 933 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 934 PetscCall(VecScatterDestroy(&vecscat)); 935 PetscCall(ISDestroy(&list1)); 936 PetscCall(ISDestroy(&list2)); 937 PetscCall(PetscFree(indx1)); 938 PetscCall(PetscFree(indx2)); 939 #endif 940 break; 941 } 942 #endif 943 } 944 PetscFunctionReturn(PETSC_SUCCESS); 945 } 946 947 /*@ 948 VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 949 950 Collective 951 952 Input Parameters: 953 + A - `MATFFTW` matrix 954 - x - FFTW vector 955 956 Output Parameters: 957 . y - PETSc vector 958 959 Level: intermediate 960 961 Note: 962 While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 963 `VecScatterFFTWToPetsc()` removes those extra zeros. 964 965 .seealso: `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 966 @*/ 967 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 968 { 969 PetscFunctionBegin; 970 PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 971 PetscFunctionReturn(PETSC_SUCCESS); 972 } 973 974 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 975 { 976 MPI_Comm comm; 977 Mat_FFT *fft = (Mat_FFT *)A->data; 978 PetscInt low; 979 PetscMPIInt rank, size; 980 VecScatter vecscat; 981 IS list1; 982 983 PetscFunctionBegin; 984 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 985 PetscCallMPI(MPI_Comm_size(comm, &size)); 986 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 987 PetscCall(VecGetOwnershipRange(x, &low, NULL)); 988 989 if (size == 1) { 990 PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 991 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 992 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 993 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 994 PetscCall(VecScatterDestroy(&vecscat)); 995 PetscCall(ISDestroy(&list1)); 996 997 #if !PetscDefined(HAVE_MPIUNI) 998 } else { 999 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 1000 PetscInt ndim = fft->ndim, *dim = fft->dim; 1001 ptrdiff_t local_n0, local_0_start; 1002 ptrdiff_t local_n1, local_1_start; 1003 IS list2; 1004 #if !defined(PETSC_USE_COMPLEX) 1005 PetscInt i, j, k, partial_dim; 1006 PetscInt *indx1, *indx2, tempindx, tempindx1; 1007 PetscInt NM; 1008 ptrdiff_t temp; 1009 #endif 1010 switch (ndim) { 1011 case 1: 1012 #if defined(PETSC_USE_COMPLEX) 1013 fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1014 1015 PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 1016 PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 1017 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1018 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1019 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1020 PetscCall(VecScatterDestroy(&vecscat)); 1021 PetscCall(ISDestroy(&list1)); 1022 PetscCall(ISDestroy(&list2)); 1023 #else 1024 SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 1025 #endif 1026 break; 1027 case 2: 1028 #if defined(PETSC_USE_COMPLEX) 1029 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1030 1031 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 1032 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 1033 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1034 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1035 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1036 PetscCall(VecScatterDestroy(&vecscat)); 1037 PetscCall(ISDestroy(&list1)); 1038 PetscCall(ISDestroy(&list2)); 1039 #else 1040 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1041 1042 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 1043 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 1044 1045 if (dim[1] % 2 == 0) NM = dim[1] + 2; 1046 else NM = dim[1] + 1; 1047 1048 for (i = 0; i < local_n0; i++) { 1049 for (j = 0; j < dim[1]; j++) { 1050 tempindx = i * dim[1] + j; 1051 tempindx1 = i * NM + j; 1052 1053 indx1[tempindx] = local_0_start * dim[1] + tempindx; 1054 indx2[tempindx] = low + tempindx1; 1055 } 1056 } 1057 1058 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 1059 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 1060 1061 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1062 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1063 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall(VecScatterDestroy(&vecscat)); 1065 PetscCall(ISDestroy(&list1)); 1066 PetscCall(ISDestroy(&list2)); 1067 PetscCall(PetscFree(indx1)); 1068 PetscCall(PetscFree(indx2)); 1069 #endif 1070 break; 1071 case 3: 1072 #if defined(PETSC_USE_COMPLEX) 1073 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1074 1075 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 1076 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 1077 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1078 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1079 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1080 PetscCall(VecScatterDestroy(&vecscat)); 1081 PetscCall(ISDestroy(&list1)); 1082 PetscCall(ISDestroy(&list2)); 1083 #else 1084 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1085 1086 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 1087 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 1088 1089 if (dim[2] % 2 == 0) NM = dim[2] + 2; 1090 else NM = dim[2] + 1; 1091 1092 for (i = 0; i < local_n0; i++) { 1093 for (j = 0; j < dim[1]; j++) { 1094 for (k = 0; k < dim[2]; k++) { 1095 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 1096 tempindx1 = i * dim[1] * NM + j * NM + k; 1097 1098 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 1099 indx2[tempindx] = low + tempindx1; 1100 } 1101 } 1102 } 1103 1104 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 1105 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 1106 1107 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1108 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1109 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1110 PetscCall(VecScatterDestroy(&vecscat)); 1111 PetscCall(ISDestroy(&list1)); 1112 PetscCall(ISDestroy(&list2)); 1113 PetscCall(PetscFree(indx1)); 1114 PetscCall(PetscFree(indx2)); 1115 #endif 1116 break; 1117 default: 1118 #if defined(PETSC_USE_COMPLEX) 1119 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 1120 1121 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 1122 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 1123 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1124 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1125 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1126 PetscCall(VecScatterDestroy(&vecscat)); 1127 PetscCall(ISDestroy(&list1)); 1128 PetscCall(ISDestroy(&list2)); 1129 #else 1130 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 1131 1132 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 1133 1134 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1135 1136 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1137 1138 partial_dim = fftw->partial_dim; 1139 1140 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 1141 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 1142 1143 if (dim[ndim - 1] % 2 == 0) NM = 2; 1144 else NM = 1; 1145 1146 j = low; 1147 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 1148 indx1[i] = local_0_start * partial_dim + i; 1149 indx2[i] = j; 1150 if (k % dim[ndim - 1] == 0) j += NM; 1151 j++; 1152 } 1153 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 1154 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1155 1156 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1157 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1158 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1159 PetscCall(VecScatterDestroy(&vecscat)); 1160 PetscCall(ISDestroy(&list1)); 1161 PetscCall(ISDestroy(&list2)); 1162 PetscCall(PetscFree(indx1)); 1163 PetscCall(PetscFree(indx2)); 1164 #endif 1165 break; 1166 } 1167 #endif 1168 } 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 /* 1173 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1174 1175 Options Database Keys: 1176 + -mat_fftw_plannerflags - set FFTW planner flags 1177 1178 Level: intermediate 1179 1180 */ 1181 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1182 { 1183 MPI_Comm comm; 1184 Mat_FFT *fft = (Mat_FFT *)A->data; 1185 Mat_FFTW *fftw; 1186 PetscInt ndim = fft->ndim, *dim = fft->dim; 1187 const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 1188 unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1189 PetscBool flg; 1190 PetscInt p_flag, partial_dim = 1, ctr; 1191 PetscMPIInt size, rank; 1192 ptrdiff_t *pdim; 1193 #if !defined(PETSC_USE_COMPLEX) 1194 PetscInt tot_dim; 1195 #endif 1196 1197 PetscFunctionBegin; 1198 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1199 PetscCallMPI(MPI_Comm_size(comm, &size)); 1200 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1201 1202 #if !PetscDefined(HAVE_MPIUNI) 1203 fftw_mpi_init(); 1204 #endif 1205 pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 1206 pdim[0] = dim[0]; 1207 #if !defined(PETSC_USE_COMPLEX) 1208 tot_dim = 2 * dim[0]; 1209 #endif 1210 for (ctr = 1; ctr < ndim; ctr++) { 1211 partial_dim *= dim[ctr]; 1212 pdim[ctr] = dim[ctr]; 1213 #if !defined(PETSC_USE_COMPLEX) 1214 if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1215 else tot_dim *= dim[ctr]; 1216 #endif 1217 } 1218 1219 if (size == 1) { 1220 #if defined(PETSC_USE_COMPLEX) 1221 PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 1222 fft->n = fft->N; 1223 #else 1224 PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 1225 fft->n = tot_dim; 1226 #endif 1227 #if !PetscDefined(HAVE_MPIUNI) 1228 } else { 1229 ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 1230 #if !defined(PETSC_USE_COMPLEX) 1231 ptrdiff_t temp; 1232 PetscInt N1; 1233 #endif 1234 1235 switch (ndim) { 1236 case 1: 1237 #if !defined(PETSC_USE_COMPLEX) 1238 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1239 #else 1240 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1241 fft->n = (PetscInt)local_n0; 1242 PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1243 #endif 1244 break; 1245 case 2: 1246 #if defined(PETSC_USE_COMPLEX) 1247 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1248 fft->n = (PetscInt)local_n0 * dim[1]; 1249 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1250 #else 1251 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1252 1253 fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 1254 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 1255 #endif 1256 break; 1257 case 3: 1258 #if defined(PETSC_USE_COMPLEX) 1259 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1260 1261 fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 1262 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1263 #else 1264 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1265 1266 fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 1267 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 1268 #endif 1269 break; 1270 default: 1271 #if defined(PETSC_USE_COMPLEX) 1272 fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 1273 1274 fft->n = (PetscInt)local_n0 * partial_dim; 1275 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1276 #else 1277 temp = pdim[ndim - 1]; 1278 1279 pdim[ndim - 1] = temp / 2 + 1; 1280 1281 fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1282 1283 fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 1284 N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 1285 1286 pdim[ndim - 1] = temp; 1287 1288 PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1289 #endif 1290 break; 1291 } 1292 #endif 1293 } 1294 free(pdim); 1295 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 1296 PetscCall(PetscNew(&fftw)); 1297 fft->data = (void *)fftw; 1298 1299 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1300 fftw->partial_dim = partial_dim; 1301 1302 PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 1303 if (size == 1) { 1304 #if defined(PETSC_USE_64BIT_INDICES) 1305 fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1306 #else 1307 fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1308 #endif 1309 } 1310 1311 for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1312 1313 fftw->p_forward = NULL; 1314 fftw->p_backward = NULL; 1315 fftw->p_flag = FFTW_ESTIMATE; 1316 1317 if (size == 1) { 1318 A->ops->mult = MatMult_SeqFFTW; 1319 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1320 #if !PetscDefined(HAVE_MPIUNI) 1321 } else { 1322 A->ops->mult = MatMult_MPIFFTW; 1323 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1324 #endif 1325 } 1326 fft->matdestroy = MatDestroy_FFTW; 1327 A->assembled = PETSC_TRUE; 1328 A->preallocated = PETSC_TRUE; 1329 1330 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 1331 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 1332 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1333 1334 /* get runtime options */ 1335 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 1336 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 1337 if (flg) fftw->p_flag = iplans[p_flag]; 1338 PetscOptionsEnd(); 1339 PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341