1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #if !PetscDefined(HAVE_MPIUNI) 10 #include <fftw3-mpi.h> 11 #else 12 #include <fftw3.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 ptrdiff_t ndim_fftw, *dim_fftw; 18 #if defined(PETSC_USE_64BIT_INDICES) 19 fftw_iodim64 *iodims; 20 #else 21 fftw_iodim *iodims; 22 #endif 23 PetscInt partial_dim; 24 fftw_plan p_forward, p_backward; 25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26 PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be 27 executed for the arrays with which the plan was created */ 28 } Mat_FFTW; 29 30 extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 32 extern PetscErrorCode MatDestroy_FFTW(Mat); 33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 34 #if !PetscDefined(HAVE_MPIUNI) 35 extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 38 #endif 39 40 PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 41 { 42 Mat_FFT *fft = (Mat_FFT *)A->data; 43 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 44 const PetscScalar *x_array; 45 PetscScalar *y_array; 46 #if defined(PETSC_USE_COMPLEX) 47 #if defined(PETSC_USE_64BIT_INDICES) 48 fftw_iodim64 *iodims; 49 #else 50 fftw_iodim *iodims; 51 #endif 52 PetscInt i; 53 #endif 54 PetscInt ndim = fft->ndim, *dim = fft->dim; 55 56 PetscFunctionBegin; 57 PetscCall(VecGetArrayRead(x, &x_array)); 58 PetscCall(VecGetArray(y, &y_array)); 59 if (!fftw->p_forward) { /* create a plan, then execute it */ 60 switch (ndim) { 61 case 1: 62 #if defined(PETSC_USE_COMPLEX) 63 fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 64 #else 65 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 66 #endif 67 break; 68 case 2: 69 #if defined(PETSC_USE_COMPLEX) 70 fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 71 #else 72 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 73 #endif 74 break; 75 case 3: 76 #if defined(PETSC_USE_COMPLEX) 77 fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 78 #else 79 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 80 #endif 81 break; 82 default: 83 #if defined(PETSC_USE_COMPLEX) 84 iodims = fftw->iodims; 85 #if defined(PETSC_USE_64BIT_INDICES) 86 if (ndim) { 87 iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 88 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 89 for (i = ndim - 2; i >= 0; --i) { 90 iodims[i].n = (ptrdiff_t)dim[i]; 91 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 92 } 93 } 94 fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 95 #else 96 if (ndim) { 97 iodims[ndim - 1].n = (int)dim[ndim - 1]; 98 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 99 for (i = ndim - 2; i >= 0; --i) { 100 iodims[i].n = (int)dim[i]; 101 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 102 } 103 } 104 fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 105 #endif 106 107 #else 108 fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 109 #endif 110 break; 111 } 112 fftw->finarray = (PetscScalar *)x_array; 113 fftw->foutarray = y_array; 114 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 115 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 116 fftw_execute(fftw->p_forward); 117 } else { /* use existing plan */ 118 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 119 #if defined(PETSC_USE_COMPLEX) 120 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 121 #else 122 fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 123 #endif 124 } else { 125 fftw_execute(fftw->p_forward); 126 } 127 } 128 PetscCall(VecRestoreArray(y, &y_array)); 129 PetscCall(VecRestoreArrayRead(x, &x_array)); 130 PetscFunctionReturn(PETSC_SUCCESS); 131 } 132 133 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 134 { 135 Mat_FFT *fft = (Mat_FFT *)A->data; 136 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 137 const PetscScalar *x_array; 138 PetscScalar *y_array; 139 PetscInt ndim = fft->ndim, *dim = fft->dim; 140 #if defined(PETSC_USE_COMPLEX) 141 #if defined(PETSC_USE_64BIT_INDICES) 142 fftw_iodim64 *iodims = fftw->iodims; 143 #else 144 fftw_iodim *iodims = fftw->iodims; 145 #endif 146 #endif 147 148 PetscFunctionBegin; 149 PetscCall(VecGetArrayRead(x, &x_array)); 150 PetscCall(VecGetArray(y, &y_array)); 151 if (!fftw->p_backward) { /* create a plan, then execute it */ 152 switch (ndim) { 153 case 1: 154 #if defined(PETSC_USE_COMPLEX) 155 fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 156 #else 157 fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 158 #endif 159 break; 160 case 2: 161 #if defined(PETSC_USE_COMPLEX) 162 fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 163 #else 164 fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 165 #endif 166 break; 167 case 3: 168 #if defined(PETSC_USE_COMPLEX) 169 fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 170 #else 171 fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 172 #endif 173 break; 174 default: 175 #if defined(PETSC_USE_COMPLEX) 176 #if defined(PETSC_USE_64BIT_INDICES) 177 fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 178 #else 179 fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 180 #endif 181 #else 182 fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 183 #endif 184 break; 185 } 186 fftw->binarray = (PetscScalar *)x_array; 187 fftw->boutarray = y_array; 188 fftw_execute(fftw->p_backward); 189 } else { /* use existing plan */ 190 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 191 #if defined(PETSC_USE_COMPLEX) 192 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 193 #else 194 fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 195 #endif 196 } else { 197 fftw_execute(fftw->p_backward); 198 } 199 } 200 PetscCall(VecRestoreArray(y, &y_array)); 201 PetscCall(VecRestoreArrayRead(x, &x_array)); 202 PetscFunctionReturn(PETSC_SUCCESS); 203 } 204 205 #if !PetscDefined(HAVE_MPIUNI) 206 PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 207 { 208 Mat_FFT *fft = (Mat_FFT *)A->data; 209 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 210 const PetscScalar *x_array; 211 PetscScalar *y_array; 212 PetscInt ndim = fft->ndim, *dim = fft->dim; 213 MPI_Comm comm; 214 215 PetscFunctionBegin; 216 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 217 PetscCall(VecGetArrayRead(x, &x_array)); 218 PetscCall(VecGetArray(y, &y_array)); 219 if (!fftw->p_forward) { /* create a plan, then execute it */ 220 switch (ndim) { 221 case 1: 222 #if defined(PETSC_USE_COMPLEX) 223 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 224 #else 225 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 226 #endif 227 break; 228 case 2: 229 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 230 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 231 #else 232 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 233 #endif 234 break; 235 case 3: 236 #if defined(PETSC_USE_COMPLEX) 237 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 238 #else 239 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 240 #endif 241 break; 242 default: 243 #if defined(PETSC_USE_COMPLEX) 244 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 245 #else 246 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 247 #endif 248 break; 249 } 250 fftw->finarray = (PetscScalar *)x_array; 251 fftw->foutarray = y_array; 252 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 253 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 254 fftw_execute(fftw->p_forward); 255 } else { /* use existing plan */ 256 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 257 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 258 } else { 259 fftw_execute(fftw->p_forward); 260 } 261 } 262 PetscCall(VecRestoreArray(y, &y_array)); 263 PetscCall(VecRestoreArrayRead(x, &x_array)); 264 PetscFunctionReturn(PETSC_SUCCESS); 265 } 266 267 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 268 { 269 Mat_FFT *fft = (Mat_FFT *)A->data; 270 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 271 const PetscScalar *x_array; 272 PetscScalar *y_array; 273 PetscInt ndim = fft->ndim, *dim = fft->dim; 274 MPI_Comm comm; 275 276 PetscFunctionBegin; 277 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 278 PetscCall(VecGetArrayRead(x, &x_array)); 279 PetscCall(VecGetArray(y, &y_array)); 280 if (!fftw->p_backward) { /* create a plan, then execute it */ 281 switch (ndim) { 282 case 1: 283 #if defined(PETSC_USE_COMPLEX) 284 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 285 #else 286 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 287 #endif 288 break; 289 case 2: 290 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 291 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 292 #else 293 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 294 #endif 295 break; 296 case 3: 297 #if defined(PETSC_USE_COMPLEX) 298 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 299 #else 300 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 301 #endif 302 break; 303 default: 304 #if defined(PETSC_USE_COMPLEX) 305 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 306 #else 307 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 308 #endif 309 break; 310 } 311 fftw->binarray = (PetscScalar *)x_array; 312 fftw->boutarray = y_array; 313 fftw_execute(fftw->p_backward); 314 } else { /* use existing plan */ 315 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 316 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 317 } else { 318 fftw_execute(fftw->p_backward); 319 } 320 } 321 PetscCall(VecRestoreArray(y, &y_array)); 322 PetscCall(VecRestoreArrayRead(x, &x_array)); 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 #endif 326 327 PetscErrorCode MatDestroy_FFTW(Mat A) 328 { 329 Mat_FFT *fft = (Mat_FFT *)A->data; 330 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 331 332 PetscFunctionBegin; 333 fftw_destroy_plan(fftw->p_forward); 334 fftw_destroy_plan(fftw->p_backward); 335 if (fftw->iodims) free(fftw->iodims); 336 PetscCall(PetscFree(fftw->dim_fftw)); 337 PetscCall(PetscFree(fft->data)); 338 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 339 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 340 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 341 #if !PetscDefined(HAVE_MPIUNI) 342 fftw_mpi_cleanup(); 343 #endif 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 #if !PetscDefined(HAVE_MPIUNI) 348 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 349 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 350 { 351 PetscScalar *array; 352 353 PetscFunctionBegin; 354 PetscCall(VecGetArray(v, &array)); 355 fftw_free((fftw_complex *)array); 356 PetscCall(VecRestoreArray(v, &array)); 357 PetscCall(VecDestroy_MPI(v)); 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 #endif 361 362 #if !PetscDefined(HAVE_MPIUNI) 363 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 364 { 365 Mat A; 366 367 PetscFunctionBegin; 368 PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 369 PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 370 PetscFunctionReturn(PETSC_SUCCESS); 371 } 372 373 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 374 { 375 Mat A; 376 377 PetscFunctionBegin; 378 PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 379 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 380 PetscFunctionReturn(PETSC_SUCCESS); 381 } 382 383 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 384 { 385 Mat A; 386 387 PetscFunctionBegin; 388 PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 389 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 #endif 393 394 /*@ 395 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 396 parallel layout determined by `MATFFTW` 397 398 Collective 399 400 Input Parameter: 401 . A - the matrix 402 403 Output Parameters: 404 + x - (optional) input vector of forward FFTW 405 . y - (optional) output vector of forward FFTW 406 - z - (optional) output vector of backward FFTW 407 408 Options Database Key: 409 . -mat_fftw_plannerflags - set FFTW planner flags 410 411 Level: advanced 412 413 Notes: 414 The parallel layout of output of forward FFTW is always same as the input 415 of the backward FFTW. But parallel layout of the input vector of forward 416 FFTW might not be same as the output of backward FFTW. 417 418 We need to provide enough space while doing parallel real transform. 419 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 420 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 421 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 422 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 423 zeros if it is odd only one zero is needed. 424 425 Lastly one needs some scratch space at the end of data set in each process. alloc_local 426 figures out how much space is needed, i.e. it figures out the data+scratch space for 427 each processor and returns that. 428 429 Developer Note: 430 How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 431 432 .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 433 @*/ 434 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 435 { 436 PetscFunctionBegin; 437 PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 438 PetscFunctionReturn(PETSC_SUCCESS); 439 } 440 441 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 442 { 443 PetscMPIInt size, rank; 444 MPI_Comm comm; 445 Mat_FFT *fft = (Mat_FFT *)A->data; 446 447 PetscFunctionBegin; 448 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 449 PetscValidType(A, 1); 450 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 451 452 PetscCallMPI(MPI_Comm_size(comm, &size)); 453 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 454 if (size == 1) { /* sequential case */ 455 #if defined(PETSC_USE_COMPLEX) 456 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 457 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 458 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 459 #else 460 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 461 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 462 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 463 #endif 464 #if !PetscDefined(HAVE_MPIUNI) 465 } else { /* parallel cases */ 466 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 467 PetscInt ndim = fft->ndim, *dim = fft->dim; 468 ptrdiff_t alloc_local, local_n0, local_0_start; 469 ptrdiff_t local_n1; 470 fftw_complex *data_fout; 471 ptrdiff_t local_1_start; 472 #if defined(PETSC_USE_COMPLEX) 473 fftw_complex *data_fin, *data_bout; 474 #else 475 double *data_finr, *data_boutr; 476 PetscInt n1, N1; 477 ptrdiff_t temp; 478 #endif 479 480 switch (ndim) { 481 case 1: 482 #if !defined(PETSC_USE_COMPLEX) 483 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 484 #else 485 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 486 if (fin) { 487 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 488 PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 489 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 490 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 491 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 492 } 493 if (fout) { 494 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 495 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 496 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 497 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 498 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 499 } 500 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 501 if (bout) { 502 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 503 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 504 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 505 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 506 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 507 } 508 break; 509 #endif 510 case 2: 511 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 512 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 513 N1 = 2 * dim[0] * (dim[1] / 2 + 1); 514 n1 = 2 * local_n0 * (dim[1] / 2 + 1); 515 if (fin) { 516 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 517 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 518 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 519 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 520 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 521 } 522 if (fout) { 523 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 524 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 525 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 526 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 527 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 528 } 529 if (bout) { 530 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 531 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 532 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 533 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 534 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 535 } 536 #else 537 /* Get local size */ 538 alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 539 if (fin) { 540 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 541 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 542 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 543 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 544 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 545 } 546 if (fout) { 547 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 548 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 549 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 550 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 551 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 if (bout) { 554 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 555 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 556 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 557 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 558 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 559 } 560 #endif 561 break; 562 case 3: 563 #if !defined(PETSC_USE_COMPLEX) 564 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 565 N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 566 n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 567 if (fin) { 568 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 569 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 570 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 571 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 572 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 573 } 574 if (fout) { 575 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 576 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 577 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 578 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 579 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 580 } 581 if (bout) { 582 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 583 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 584 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 585 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 586 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 587 } 588 #else 589 alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 590 if (fin) { 591 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 592 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 593 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 594 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 595 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 596 } 597 if (fout) { 598 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 599 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 600 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 601 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 602 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 603 } 604 if (bout) { 605 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 606 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 607 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 608 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 609 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 610 } 611 #endif 612 break; 613 default: 614 #if !defined(PETSC_USE_COMPLEX) 615 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 616 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 617 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 618 N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 619 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 620 621 if (fin) { 622 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 623 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 624 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 625 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 626 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 627 } 628 if (fout) { 629 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 630 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 631 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 632 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 633 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 634 } 635 if (bout) { 636 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 637 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 638 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 639 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 640 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 641 } 642 #else 643 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 644 if (fin) { 645 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 646 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 647 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 648 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 649 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 650 } 651 if (fout) { 652 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 653 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 654 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 655 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 656 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 657 } 658 if (bout) { 659 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 660 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 661 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 662 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 663 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 664 } 665 #endif 666 break; 667 } 668 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 669 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 670 memory leaks. We void these pointers here to avoid accident uses. 671 */ 672 if (fin) (*fin)->ops->replacearray = NULL; 673 if (fout) (*fout)->ops->replacearray = NULL; 674 if (bout) (*bout)->ops->replacearray = NULL; 675 #endif 676 } 677 PetscFunctionReturn(PETSC_SUCCESS); 678 } 679 680 /*@ 681 VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 682 683 Collective 684 685 Input Parameters: 686 + A - FFTW matrix 687 - x - the PETSc vector 688 689 Output Parameters: 690 . y - the FFTW vector 691 692 Level: intermediate 693 694 Note: 695 For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 696 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 697 zeros. This routine does that job by scattering operation. 698 699 .seealso: [](chapter_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 700 @*/ 701 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 702 { 703 PetscFunctionBegin; 704 PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 705 PetscFunctionReturn(PETSC_SUCCESS); 706 } 707 708 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 709 { 710 MPI_Comm comm; 711 Mat_FFT *fft = (Mat_FFT *)A->data; 712 PetscInt low; 713 PetscMPIInt rank, size; 714 PetscInt vsize, vsize1; 715 VecScatter vecscat; 716 IS list1; 717 718 PetscFunctionBegin; 719 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 720 PetscCallMPI(MPI_Comm_size(comm, &size)); 721 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 722 PetscCall(VecGetOwnershipRange(y, &low, NULL)); 723 724 if (size == 1) { 725 PetscCall(VecGetSize(x, &vsize)); 726 PetscCall(VecGetSize(y, &vsize1)); 727 PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 728 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 729 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 730 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 731 PetscCall(VecScatterDestroy(&vecscat)); 732 PetscCall(ISDestroy(&list1)); 733 #if !PetscDefined(HAVE_MPIUNI) 734 } else { 735 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 736 PetscInt ndim = fft->ndim, *dim = fft->dim; 737 ptrdiff_t local_n0, local_0_start; 738 ptrdiff_t local_n1, local_1_start; 739 IS list2; 740 #if !defined(PETSC_USE_COMPLEX) 741 PetscInt i, j, k, partial_dim; 742 PetscInt *indx1, *indx2, tempindx, tempindx1; 743 PetscInt NM; 744 ptrdiff_t temp; 745 #endif 746 747 switch (ndim) { 748 case 1: 749 #if defined(PETSC_USE_COMPLEX) 750 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 751 752 PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 753 PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 754 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 755 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 756 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 757 PetscCall(VecScatterDestroy(&vecscat)); 758 PetscCall(ISDestroy(&list1)); 759 PetscCall(ISDestroy(&list2)); 760 #else 761 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 762 #endif 763 break; 764 case 2: 765 #if defined(PETSC_USE_COMPLEX) 766 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 767 768 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 769 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 770 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 771 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 772 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 773 PetscCall(VecScatterDestroy(&vecscat)); 774 PetscCall(ISDestroy(&list1)); 775 PetscCall(ISDestroy(&list2)); 776 #else 777 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 778 779 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 780 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 781 782 if (dim[1] % 2 == 0) { 783 NM = dim[1] + 2; 784 } else { 785 NM = dim[1] + 1; 786 } 787 for (i = 0; i < local_n0; i++) { 788 for (j = 0; j < dim[1]; j++) { 789 tempindx = i * dim[1] + j; 790 tempindx1 = i * NM + j; 791 792 indx1[tempindx] = local_0_start * dim[1] + tempindx; 793 indx2[tempindx] = low + tempindx1; 794 } 795 } 796 797 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 798 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 799 800 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 801 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 802 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 803 PetscCall(VecScatterDestroy(&vecscat)); 804 PetscCall(ISDestroy(&list1)); 805 PetscCall(ISDestroy(&list2)); 806 PetscCall(PetscFree(indx1)); 807 PetscCall(PetscFree(indx2)); 808 #endif 809 break; 810 811 case 3: 812 #if defined(PETSC_USE_COMPLEX) 813 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 814 815 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 816 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 817 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 818 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 819 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 820 PetscCall(VecScatterDestroy(&vecscat)); 821 PetscCall(ISDestroy(&list1)); 822 PetscCall(ISDestroy(&list2)); 823 #else 824 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 825 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 826 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 827 828 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 829 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 830 831 if (dim[2] % 2 == 0) NM = dim[2] + 2; 832 else NM = dim[2] + 1; 833 834 for (i = 0; i < local_n0; i++) { 835 for (j = 0; j < dim[1]; j++) { 836 for (k = 0; k < dim[2]; k++) { 837 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 838 tempindx1 = i * dim[1] * NM + j * NM + k; 839 840 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 841 indx2[tempindx] = low + tempindx1; 842 } 843 } 844 } 845 846 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 847 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 848 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 849 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 850 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 851 PetscCall(VecScatterDestroy(&vecscat)); 852 PetscCall(ISDestroy(&list1)); 853 PetscCall(ISDestroy(&list2)); 854 PetscCall(PetscFree(indx1)); 855 PetscCall(PetscFree(indx2)); 856 #endif 857 break; 858 859 default: 860 #if defined(PETSC_USE_COMPLEX) 861 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 862 863 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 864 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 865 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 866 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 867 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 868 PetscCall(VecScatterDestroy(&vecscat)); 869 PetscCall(ISDestroy(&list1)); 870 PetscCall(ISDestroy(&list2)); 871 #else 872 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 873 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 874 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 875 876 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 877 878 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 879 880 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 881 882 partial_dim = fftw->partial_dim; 883 884 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 885 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 886 887 if (dim[ndim - 1] % 2 == 0) NM = 2; 888 else NM = 1; 889 890 j = low; 891 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 892 indx1[i] = local_0_start * partial_dim + i; 893 indx2[i] = j; 894 if (k % dim[ndim - 1] == 0) j += NM; 895 j++; 896 } 897 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 898 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 899 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 900 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 901 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 902 PetscCall(VecScatterDestroy(&vecscat)); 903 PetscCall(ISDestroy(&list1)); 904 PetscCall(ISDestroy(&list2)); 905 PetscCall(PetscFree(indx1)); 906 PetscCall(PetscFree(indx2)); 907 #endif 908 break; 909 } 910 #endif 911 } 912 PetscFunctionReturn(PETSC_SUCCESS); 913 } 914 915 /*@ 916 VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 917 918 Collective 919 920 Input Parameters: 921 + A - `MATFFTW` matrix 922 - x - FFTW vector 923 924 Output Parameters: 925 . y - PETSc vector 926 927 Level: intermediate 928 929 Note: 930 While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 931 `VecScatterFFTWToPetsc()` removes those extra zeros. 932 933 .seealso: [](chapter_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 934 @*/ 935 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 936 { 937 PetscFunctionBegin; 938 PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 939 PetscFunctionReturn(PETSC_SUCCESS); 940 } 941 942 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 943 { 944 MPI_Comm comm; 945 Mat_FFT *fft = (Mat_FFT *)A->data; 946 PetscInt low; 947 PetscMPIInt rank, size; 948 VecScatter vecscat; 949 IS list1; 950 951 PetscFunctionBegin; 952 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 953 PetscCallMPI(MPI_Comm_size(comm, &size)); 954 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 955 PetscCall(VecGetOwnershipRange(x, &low, NULL)); 956 957 if (size == 1) { 958 PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 959 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 960 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 961 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterDestroy(&vecscat)); 963 PetscCall(ISDestroy(&list1)); 964 965 #if !PetscDefined(HAVE_MPIUNI) 966 } else { 967 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 968 PetscInt ndim = fft->ndim, *dim = fft->dim; 969 ptrdiff_t local_n0, local_0_start; 970 ptrdiff_t local_n1, local_1_start; 971 IS list2; 972 #if !defined(PETSC_USE_COMPLEX) 973 PetscInt i, j, k, partial_dim; 974 PetscInt *indx1, *indx2, tempindx, tempindx1; 975 PetscInt NM; 976 ptrdiff_t temp; 977 #endif 978 switch (ndim) { 979 case 1: 980 #if defined(PETSC_USE_COMPLEX) 981 fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 982 983 PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 984 PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 985 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 986 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 987 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 988 PetscCall(VecScatterDestroy(&vecscat)); 989 PetscCall(ISDestroy(&list1)); 990 PetscCall(ISDestroy(&list2)); 991 #else 992 SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 993 #endif 994 break; 995 case 2: 996 #if defined(PETSC_USE_COMPLEX) 997 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 998 999 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 1000 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 1001 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1002 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1003 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1004 PetscCall(VecScatterDestroy(&vecscat)); 1005 PetscCall(ISDestroy(&list1)); 1006 PetscCall(ISDestroy(&list2)); 1007 #else 1008 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1009 1010 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 1011 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 1012 1013 if (dim[1] % 2 == 0) NM = dim[1] + 2; 1014 else NM = dim[1] + 1; 1015 1016 for (i = 0; i < local_n0; i++) { 1017 for (j = 0; j < dim[1]; j++) { 1018 tempindx = i * dim[1] + j; 1019 tempindx1 = i * NM + j; 1020 1021 indx1[tempindx] = local_0_start * dim[1] + tempindx; 1022 indx2[tempindx] = low + tempindx1; 1023 } 1024 } 1025 1026 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 1027 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 1028 1029 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1030 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1031 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1032 PetscCall(VecScatterDestroy(&vecscat)); 1033 PetscCall(ISDestroy(&list1)); 1034 PetscCall(ISDestroy(&list2)); 1035 PetscCall(PetscFree(indx1)); 1036 PetscCall(PetscFree(indx2)); 1037 #endif 1038 break; 1039 case 3: 1040 #if defined(PETSC_USE_COMPLEX) 1041 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1042 1043 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 1044 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 1045 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1046 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1047 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1048 PetscCall(VecScatterDestroy(&vecscat)); 1049 PetscCall(ISDestroy(&list1)); 1050 PetscCall(ISDestroy(&list2)); 1051 #else 1052 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1053 1054 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 1055 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 1056 1057 if (dim[2] % 2 == 0) NM = dim[2] + 2; 1058 else NM = dim[2] + 1; 1059 1060 for (i = 0; i < local_n0; i++) { 1061 for (j = 0; j < dim[1]; j++) { 1062 for (k = 0; k < dim[2]; k++) { 1063 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 1064 tempindx1 = i * dim[1] * NM + j * NM + k; 1065 1066 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 1067 indx2[tempindx] = low + tempindx1; 1068 } 1069 } 1070 } 1071 1072 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 1073 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 1074 1075 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1076 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1077 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1078 PetscCall(VecScatterDestroy(&vecscat)); 1079 PetscCall(ISDestroy(&list1)); 1080 PetscCall(ISDestroy(&list2)); 1081 PetscCall(PetscFree(indx1)); 1082 PetscCall(PetscFree(indx2)); 1083 #endif 1084 break; 1085 default: 1086 #if defined(PETSC_USE_COMPLEX) 1087 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 1088 1089 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 1090 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 1091 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1092 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1093 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1094 PetscCall(VecScatterDestroy(&vecscat)); 1095 PetscCall(ISDestroy(&list1)); 1096 PetscCall(ISDestroy(&list2)); 1097 #else 1098 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 1099 1100 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 1101 1102 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1103 1104 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1105 1106 partial_dim = fftw->partial_dim; 1107 1108 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 1109 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 1110 1111 if (dim[ndim - 1] % 2 == 0) NM = 2; 1112 else NM = 1; 1113 1114 j = low; 1115 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 1116 indx1[i] = local_0_start * partial_dim + i; 1117 indx2[i] = j; 1118 if (k % dim[ndim - 1] == 0) j += NM; 1119 j++; 1120 } 1121 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 1122 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1123 1124 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1125 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1126 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1127 PetscCall(VecScatterDestroy(&vecscat)); 1128 PetscCall(ISDestroy(&list1)); 1129 PetscCall(ISDestroy(&list2)); 1130 PetscCall(PetscFree(indx1)); 1131 PetscCall(PetscFree(indx2)); 1132 #endif 1133 break; 1134 } 1135 #endif 1136 } 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1141 { 1142 MPI_Comm comm; 1143 Mat_FFT *fft = (Mat_FFT *)A->data; 1144 Mat_FFTW *fftw; 1145 PetscInt ndim = fft->ndim, *dim = fft->dim; 1146 const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 1147 unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1148 PetscBool flg; 1149 PetscInt p_flag, partial_dim = 1, ctr; 1150 PetscMPIInt size, rank; 1151 ptrdiff_t *pdim; 1152 #if !defined(PETSC_USE_COMPLEX) 1153 PetscInt tot_dim; 1154 #endif 1155 1156 PetscFunctionBegin; 1157 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1158 PetscCallMPI(MPI_Comm_size(comm, &size)); 1159 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1160 1161 #if !PetscDefined(HAVE_MPIUNI) 1162 fftw_mpi_init(); 1163 #endif 1164 pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 1165 pdim[0] = dim[0]; 1166 #if !defined(PETSC_USE_COMPLEX) 1167 tot_dim = 2 * dim[0]; 1168 #endif 1169 for (ctr = 1; ctr < ndim; ctr++) { 1170 partial_dim *= dim[ctr]; 1171 pdim[ctr] = dim[ctr]; 1172 #if !defined(PETSC_USE_COMPLEX) 1173 if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1174 else tot_dim *= dim[ctr]; 1175 #endif 1176 } 1177 1178 if (size == 1) { 1179 #if defined(PETSC_USE_COMPLEX) 1180 PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 1181 fft->n = fft->N; 1182 #else 1183 PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 1184 fft->n = tot_dim; 1185 #endif 1186 #if !PetscDefined(HAVE_MPIUNI) 1187 } else { 1188 ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 1189 #if !defined(PETSC_USE_COMPLEX) 1190 ptrdiff_t temp; 1191 PetscInt N1; 1192 #endif 1193 1194 switch (ndim) { 1195 case 1: 1196 #if !defined(PETSC_USE_COMPLEX) 1197 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1198 #else 1199 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1200 fft->n = (PetscInt)local_n0; 1201 PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1202 #endif 1203 break; 1204 case 2: 1205 #if defined(PETSC_USE_COMPLEX) 1206 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1207 fft->n = (PetscInt)local_n0 * dim[1]; 1208 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1209 #else 1210 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1211 1212 fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 1213 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 1214 #endif 1215 break; 1216 case 3: 1217 #if defined(PETSC_USE_COMPLEX) 1218 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1219 1220 fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 1221 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1222 #else 1223 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1224 1225 fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 1226 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 1227 #endif 1228 break; 1229 default: 1230 #if defined(PETSC_USE_COMPLEX) 1231 fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 1232 1233 fft->n = (PetscInt)local_n0 * partial_dim; 1234 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1235 #else 1236 temp = pdim[ndim - 1]; 1237 1238 pdim[ndim - 1] = temp / 2 + 1; 1239 1240 fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1241 1242 fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 1243 N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 1244 1245 pdim[ndim - 1] = temp; 1246 1247 PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1248 #endif 1249 break; 1250 } 1251 #endif 1252 } 1253 free(pdim); 1254 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 1255 PetscCall(PetscNew(&fftw)); 1256 fft->data = (void *)fftw; 1257 1258 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1259 fftw->partial_dim = partial_dim; 1260 1261 PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 1262 if (size == 1) { 1263 #if defined(PETSC_USE_64BIT_INDICES) 1264 fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1265 #else 1266 fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1267 #endif 1268 } 1269 1270 for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1271 1272 fftw->p_forward = NULL; 1273 fftw->p_backward = NULL; 1274 fftw->p_flag = FFTW_ESTIMATE; 1275 1276 if (size == 1) { 1277 A->ops->mult = MatMult_SeqFFTW; 1278 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1279 #if !PetscDefined(HAVE_MPIUNI) 1280 } else { 1281 A->ops->mult = MatMult_MPIFFTW; 1282 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1283 #endif 1284 } 1285 fft->matdestroy = MatDestroy_FFTW; 1286 A->assembled = PETSC_TRUE; 1287 A->preallocated = PETSC_TRUE; 1288 1289 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 1290 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 1291 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1292 1293 /* get runtime options */ 1294 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 1295 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 1296 if (flg) fftw->p_flag = iplans[p_flag]; 1297 PetscOptionsEnd(); 1298 PetscFunctionReturn(PETSC_SUCCESS); 1299 } 1300