1 /* 2 Provides an interface to the FFTW package. 3 Testing examples can be found in ~src/mat/tests 4 */ 5 6 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 7 EXTERN_C_BEGIN 8 #if !PetscDefined(HAVE_MPIUNI) 9 #include <fftw3-mpi.h> 10 #else 11 #include <fftw3.h> 12 #endif 13 EXTERN_C_END 14 15 typedef struct { 16 ptrdiff_t ndim_fftw, *dim_fftw; 17 #if defined(PETSC_USE_64BIT_INDICES) 18 fftw_iodim64 *iodims; 19 #else 20 fftw_iodim *iodims; 21 #endif 22 PetscInt partial_dim; 23 fftw_plan p_forward, p_backward; 24 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 25 PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw plan should be 26 executed for the arrays with which the plan was created */ 27 } Mat_FFTW; 28 29 extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 30 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 31 extern PetscErrorCode MatDestroy_FFTW(Mat); 32 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 33 #if !PetscDefined(HAVE_MPIUNI) 34 extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 35 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 36 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 37 #endif 38 39 PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 40 { 41 Mat_FFT *fft = (Mat_FFT *)A->data; 42 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 43 const PetscScalar *x_array; 44 PetscScalar *y_array; 45 #if defined(PETSC_USE_COMPLEX) 46 #if defined(PETSC_USE_64BIT_INDICES) 47 fftw_iodim64 *iodims; 48 #else 49 fftw_iodim *iodims; 50 #endif 51 PetscInt i; 52 #endif 53 PetscInt ndim = fft->ndim, *dim = fft->dim; 54 55 PetscFunctionBegin; 56 PetscCall(VecGetArrayRead(x, &x_array)); 57 PetscCall(VecGetArray(y, &y_array)); 58 if (!fftw->p_forward) { /* create a plan, then execute it */ 59 switch (ndim) { 60 case 1: 61 #if defined(PETSC_USE_COMPLEX) 62 fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 63 #else 64 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 65 #endif 66 break; 67 case 2: 68 #if defined(PETSC_USE_COMPLEX) 69 fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 70 #else 71 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 72 #endif 73 break; 74 case 3: 75 #if defined(PETSC_USE_COMPLEX) 76 fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 77 #else 78 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 79 #endif 80 break; 81 default: 82 #if defined(PETSC_USE_COMPLEX) 83 iodims = fftw->iodims; 84 #if defined(PETSC_USE_64BIT_INDICES) 85 if (ndim) { 86 iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 87 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 88 for (i = ndim - 2; i >= 0; --i) { 89 iodims[i].n = (ptrdiff_t)dim[i]; 90 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 91 } 92 } 93 fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 94 #else 95 if (ndim) { 96 iodims[ndim - 1].n = (int)dim[ndim - 1]; 97 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 98 for (i = ndim - 2; i >= 0; --i) { 99 iodims[i].n = (int)dim[i]; 100 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 101 } 102 } 103 fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 104 #endif 105 106 #else 107 fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 108 #endif 109 break; 110 } 111 fftw->finarray = (PetscScalar *)x_array; 112 fftw->foutarray = y_array; 113 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 114 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 115 fftw_execute(fftw->p_forward); 116 } else { /* use existing plan */ 117 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 118 #if defined(PETSC_USE_COMPLEX) 119 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 120 #else 121 fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 122 #endif 123 } else { 124 fftw_execute(fftw->p_forward); 125 } 126 } 127 PetscCall(VecRestoreArray(y, &y_array)); 128 PetscCall(VecRestoreArrayRead(x, &x_array)); 129 PetscFunctionReturn(PETSC_SUCCESS); 130 } 131 132 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 133 { 134 Mat_FFT *fft = (Mat_FFT *)A->data; 135 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 136 const PetscScalar *x_array; 137 PetscScalar *y_array; 138 PetscInt ndim = fft->ndim, *dim = fft->dim; 139 #if defined(PETSC_USE_COMPLEX) 140 #if defined(PETSC_USE_64BIT_INDICES) 141 fftw_iodim64 *iodims = fftw->iodims; 142 #else 143 fftw_iodim *iodims = fftw->iodims; 144 #endif 145 #endif 146 147 PetscFunctionBegin; 148 PetscCall(VecGetArrayRead(x, &x_array)); 149 PetscCall(VecGetArray(y, &y_array)); 150 if (!fftw->p_backward) { /* create a plan, then execute it */ 151 switch (ndim) { 152 case 1: 153 #if defined(PETSC_USE_COMPLEX) 154 fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 155 #else 156 fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 157 #endif 158 break; 159 case 2: 160 #if defined(PETSC_USE_COMPLEX) 161 fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 162 #else 163 fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 164 #endif 165 break; 166 case 3: 167 #if defined(PETSC_USE_COMPLEX) 168 fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 169 #else 170 fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 171 #endif 172 break; 173 default: 174 #if defined(PETSC_USE_COMPLEX) 175 #if defined(PETSC_USE_64BIT_INDICES) 176 fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 177 #else 178 fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 179 #endif 180 #else 181 fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 182 #endif 183 break; 184 } 185 fftw->binarray = (PetscScalar *)x_array; 186 fftw->boutarray = y_array; 187 fftw_execute(fftw->p_backward); 188 } else { /* use existing plan */ 189 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 190 #if defined(PETSC_USE_COMPLEX) 191 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 192 #else 193 fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 194 #endif 195 } else { 196 fftw_execute(fftw->p_backward); 197 } 198 } 199 PetscCall(VecRestoreArray(y, &y_array)); 200 PetscCall(VecRestoreArrayRead(x, &x_array)); 201 PetscFunctionReturn(PETSC_SUCCESS); 202 } 203 204 #if !PetscDefined(HAVE_MPIUNI) 205 PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 206 { 207 Mat_FFT *fft = (Mat_FFT *)A->data; 208 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 209 const PetscScalar *x_array; 210 PetscScalar *y_array; 211 PetscInt ndim = fft->ndim, *dim = fft->dim; 212 MPI_Comm comm; 213 214 PetscFunctionBegin; 215 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 216 PetscCall(VecGetArrayRead(x, &x_array)); 217 PetscCall(VecGetArray(y, &y_array)); 218 if (!fftw->p_forward) { /* create a plan, then execute it */ 219 switch (ndim) { 220 case 1: 221 #if defined(PETSC_USE_COMPLEX) 222 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 223 #else 224 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 225 #endif 226 break; 227 case 2: 228 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 229 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 230 #else 231 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 232 #endif 233 break; 234 case 3: 235 #if defined(PETSC_USE_COMPLEX) 236 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 237 #else 238 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 239 #endif 240 break; 241 default: 242 #if defined(PETSC_USE_COMPLEX) 243 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 244 #else 245 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 246 #endif 247 break; 248 } 249 fftw->finarray = (PetscScalar *)x_array; 250 fftw->foutarray = y_array; 251 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 252 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 253 fftw_execute(fftw->p_forward); 254 } else { /* use existing plan */ 255 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 256 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 257 } else { 258 fftw_execute(fftw->p_forward); 259 } 260 } 261 PetscCall(VecRestoreArray(y, &y_array)); 262 PetscCall(VecRestoreArrayRead(x, &x_array)); 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 267 { 268 Mat_FFT *fft = (Mat_FFT *)A->data; 269 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 270 const PetscScalar *x_array; 271 PetscScalar *y_array; 272 PetscInt ndim = fft->ndim, *dim = fft->dim; 273 MPI_Comm comm; 274 275 PetscFunctionBegin; 276 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 277 PetscCall(VecGetArrayRead(x, &x_array)); 278 PetscCall(VecGetArray(y, &y_array)); 279 if (!fftw->p_backward) { /* create a plan, then execute it */ 280 switch (ndim) { 281 case 1: 282 #if defined(PETSC_USE_COMPLEX) 283 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 284 #else 285 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 286 #endif 287 break; 288 case 2: 289 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 290 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 291 #else 292 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 293 #endif 294 break; 295 case 3: 296 #if defined(PETSC_USE_COMPLEX) 297 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 298 #else 299 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 300 #endif 301 break; 302 default: 303 #if defined(PETSC_USE_COMPLEX) 304 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 305 #else 306 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 307 #endif 308 break; 309 } 310 fftw->binarray = (PetscScalar *)x_array; 311 fftw->boutarray = y_array; 312 fftw_execute(fftw->p_backward); 313 } else { /* use existing plan */ 314 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 315 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 316 } else { 317 fftw_execute(fftw->p_backward); 318 } 319 } 320 PetscCall(VecRestoreArray(y, &y_array)); 321 PetscCall(VecRestoreArrayRead(x, &x_array)); 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 #endif 325 326 PetscErrorCode MatDestroy_FFTW(Mat A) 327 { 328 Mat_FFT *fft = (Mat_FFT *)A->data; 329 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 330 331 PetscFunctionBegin; 332 fftw_destroy_plan(fftw->p_forward); 333 fftw_destroy_plan(fftw->p_backward); 334 if (fftw->iodims) free(fftw->iodims); 335 PetscCall(PetscFree(fftw->dim_fftw)); 336 PetscCall(PetscFree(fft->data)); 337 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 338 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 339 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 340 #if !PetscDefined(HAVE_MPIUNI) 341 fftw_mpi_cleanup(); 342 #endif 343 PetscFunctionReturn(PETSC_SUCCESS); 344 } 345 346 #if !PetscDefined(HAVE_MPIUNI) 347 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 348 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 349 { 350 PetscScalar *array; 351 352 PetscFunctionBegin; 353 PetscCall(VecGetArray(v, &array)); 354 fftw_free((fftw_complex *)array); 355 PetscCall(VecRestoreArray(v, &array)); 356 PetscCall(VecDestroy_MPI(v)); 357 PetscFunctionReturn(PETSC_SUCCESS); 358 } 359 #endif 360 361 #if !PetscDefined(HAVE_MPIUNI) 362 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 363 { 364 Mat A; 365 366 PetscFunctionBegin; 367 PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 368 PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 369 PetscFunctionReturn(PETSC_SUCCESS); 370 } 371 372 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 373 { 374 Mat A; 375 376 PetscFunctionBegin; 377 PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 378 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 383 { 384 Mat A; 385 386 PetscFunctionBegin; 387 PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 388 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } 391 #endif 392 393 /*@ 394 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 395 parallel layout determined by `MATFFTW` 396 397 Collective 398 399 Input Parameter: 400 . A - the matrix 401 402 Output Parameters: 403 + x - (optional) input vector of forward FFTW 404 . y - (optional) output vector of forward FFTW 405 - z - (optional) output vector of backward FFTW 406 407 Options Database Key: 408 . -mat_fftw_plannerflags - set FFTW planner flags 409 410 Level: advanced 411 412 Notes: 413 The parallel layout of output of forward FFTW is always same as the input 414 of the backward FFTW. But parallel layout of the input vector of forward 415 FFTW might not be same as the output of backward FFTW. 416 417 We need to provide enough space while doing parallel real transform. 418 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 419 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 420 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 421 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 422 zeros if it is odd only one zero is needed. 423 424 Lastly one needs some scratch space at the end of data set in each process. alloc_local 425 figures out how much space is needed, i.e. it figures out the data+scratch space for 426 each processor and returns that. 427 428 Developer Notes: 429 How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 430 431 .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 432 @*/ 433 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 434 { 435 PetscFunctionBegin; 436 PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 437 PetscFunctionReturn(PETSC_SUCCESS); 438 } 439 440 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 441 { 442 PetscMPIInt size, rank; 443 MPI_Comm comm; 444 Mat_FFT *fft = (Mat_FFT *)A->data; 445 446 PetscFunctionBegin; 447 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 448 PetscValidType(A, 1); 449 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 450 451 PetscCallMPI(MPI_Comm_size(comm, &size)); 452 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 453 if (size == 1) { /* sequential case */ 454 #if defined(PETSC_USE_COMPLEX) 455 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 456 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 457 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 458 #else 459 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 460 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 461 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 462 #endif 463 #if !PetscDefined(HAVE_MPIUNI) 464 } else { /* parallel cases */ 465 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 466 PetscInt ndim = fft->ndim, *dim = fft->dim; 467 ptrdiff_t alloc_local, local_n0, local_0_start; 468 ptrdiff_t local_n1; 469 fftw_complex *data_fout; 470 ptrdiff_t local_1_start; 471 #if defined(PETSC_USE_COMPLEX) 472 fftw_complex *data_fin, *data_bout; 473 #else 474 double *data_finr, *data_boutr; 475 PetscInt n1, N1; 476 ptrdiff_t temp; 477 #endif 478 479 switch (ndim) { 480 case 1: 481 #if !defined(PETSC_USE_COMPLEX) 482 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 483 #else 484 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 485 if (fin) { 486 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 487 PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 488 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 489 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 490 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 491 } 492 if (fout) { 493 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 494 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 495 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 496 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 497 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 498 } 499 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 500 if (bout) { 501 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 502 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 503 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 504 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 505 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 506 } 507 break; 508 #endif 509 case 2: 510 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 511 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 512 N1 = 2 * dim[0] * (dim[1] / 2 + 1); 513 n1 = 2 * local_n0 * (dim[1] / 2 + 1); 514 if (fin) { 515 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 516 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 517 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 518 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 519 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 520 } 521 if (fout) { 522 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 523 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 524 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 525 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 526 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 527 } 528 if (bout) { 529 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 530 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 531 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 532 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 533 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 #else 536 /* Get local size */ 537 alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 538 if (fin) { 539 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 540 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 541 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 542 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 543 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 544 } 545 if (fout) { 546 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 547 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 548 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 549 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 550 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 551 } 552 if (bout) { 553 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 554 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 555 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 556 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 557 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 558 } 559 #endif 560 break; 561 case 3: 562 #if !defined(PETSC_USE_COMPLEX) 563 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 564 N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 565 n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 566 if (fin) { 567 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 568 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 569 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 570 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 571 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 572 } 573 if (fout) { 574 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 575 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 576 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 577 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 578 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 579 } 580 if (bout) { 581 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 582 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 583 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 584 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 585 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 586 } 587 #else 588 alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 589 if (fin) { 590 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 591 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 592 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 593 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 594 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 595 } 596 if (fout) { 597 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 598 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 599 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 600 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 601 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 602 } 603 if (bout) { 604 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 605 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 606 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 607 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 608 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 609 } 610 #endif 611 break; 612 default: 613 #if !defined(PETSC_USE_COMPLEX) 614 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 615 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 616 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 617 N1 = 2 * fft->N * (PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 618 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 619 620 if (fin) { 621 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 622 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 623 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 624 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 625 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 626 } 627 if (fout) { 628 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 629 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 630 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 631 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 632 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 633 } 634 if (bout) { 635 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 636 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 637 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 638 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 639 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 640 } 641 #else 642 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 643 if (fin) { 644 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 645 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 646 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 647 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 648 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 649 } 650 if (fout) { 651 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 652 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 653 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 654 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 655 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 656 } 657 if (bout) { 658 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 659 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 660 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 661 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 662 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 663 } 664 #endif 665 break; 666 } 667 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 668 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 669 memory leaks. We void these pointers here to avoid accident uses. 670 */ 671 if (fin) (*fin)->ops->replacearray = NULL; 672 if (fout) (*fout)->ops->replacearray = NULL; 673 if (bout) (*bout)->ops->replacearray = NULL; 674 #endif 675 } 676 PetscFunctionReturn(PETSC_SUCCESS); 677 } 678 679 /*@ 680 VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 681 682 Collective 683 684 Input Parameters: 685 + A - FFTW matrix 686 - x - the PETSc vector 687 688 Output Parameter: 689 . y - the FFTW vector 690 691 Level: intermediate 692 693 Note: 694 For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 695 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 696 zeros. This routine does that job by scattering operation. 697 698 .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 699 @*/ 700 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 701 { 702 PetscFunctionBegin; 703 PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 704 PetscFunctionReturn(PETSC_SUCCESS); 705 } 706 707 static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 708 { 709 MPI_Comm comm; 710 Mat_FFT *fft = (Mat_FFT *)A->data; 711 PetscInt low; 712 PetscMPIInt rank, size; 713 PetscInt vsize, vsize1; 714 VecScatter vecscat; 715 IS list1; 716 717 PetscFunctionBegin; 718 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 719 PetscCallMPI(MPI_Comm_size(comm, &size)); 720 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 721 PetscCall(VecGetOwnershipRange(y, &low, NULL)); 722 723 if (size == 1) { 724 PetscCall(VecGetSize(x, &vsize)); 725 PetscCall(VecGetSize(y, &vsize1)); 726 PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 727 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 728 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 729 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 730 PetscCall(VecScatterDestroy(&vecscat)); 731 PetscCall(ISDestroy(&list1)); 732 #if !PetscDefined(HAVE_MPIUNI) 733 } else { 734 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 735 PetscInt ndim = fft->ndim, *dim = fft->dim; 736 ptrdiff_t local_n0, local_0_start; 737 ptrdiff_t local_n1, local_1_start; 738 IS list2; 739 #if !defined(PETSC_USE_COMPLEX) 740 PetscInt i, j, k, partial_dim; 741 PetscInt *indx1, *indx2, tempindx, tempindx1; 742 PetscInt NM; 743 ptrdiff_t temp; 744 #endif 745 746 switch (ndim) { 747 case 1: 748 #if defined(PETSC_USE_COMPLEX) 749 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 750 751 PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 752 PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 753 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 754 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 755 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 756 PetscCall(VecScatterDestroy(&vecscat)); 757 PetscCall(ISDestroy(&list1)); 758 PetscCall(ISDestroy(&list2)); 759 #else 760 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 761 #endif 762 break; 763 case 2: 764 #if defined(PETSC_USE_COMPLEX) 765 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 766 767 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 768 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 769 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 770 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 771 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 772 PetscCall(VecScatterDestroy(&vecscat)); 773 PetscCall(ISDestroy(&list1)); 774 PetscCall(ISDestroy(&list2)); 775 #else 776 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 777 778 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 779 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 780 781 if (dim[1] % 2 == 0) { 782 NM = dim[1] + 2; 783 } else { 784 NM = dim[1] + 1; 785 } 786 for (i = 0; i < local_n0; i++) { 787 for (j = 0; j < dim[1]; j++) { 788 tempindx = i * dim[1] + j; 789 tempindx1 = i * NM + j; 790 791 indx1[tempindx] = local_0_start * dim[1] + tempindx; 792 indx2[tempindx] = low + tempindx1; 793 } 794 } 795 796 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 797 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 798 799 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 800 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 801 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 802 PetscCall(VecScatterDestroy(&vecscat)); 803 PetscCall(ISDestroy(&list1)); 804 PetscCall(ISDestroy(&list2)); 805 PetscCall(PetscFree(indx1)); 806 PetscCall(PetscFree(indx2)); 807 #endif 808 break; 809 810 case 3: 811 #if defined(PETSC_USE_COMPLEX) 812 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 813 814 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 815 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 816 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 817 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 818 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 819 PetscCall(VecScatterDestroy(&vecscat)); 820 PetscCall(ISDestroy(&list1)); 821 PetscCall(ISDestroy(&list2)); 822 #else 823 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 824 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 825 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 826 827 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 828 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 829 830 if (dim[2] % 2 == 0) NM = dim[2] + 2; 831 else NM = dim[2] + 1; 832 833 for (i = 0; i < local_n0; i++) { 834 for (j = 0; j < dim[1]; j++) { 835 for (k = 0; k < dim[2]; k++) { 836 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 837 tempindx1 = i * dim[1] * NM + j * NM + k; 838 839 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 840 indx2[tempindx] = low + tempindx1; 841 } 842 } 843 } 844 845 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 846 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 847 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 848 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 849 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 850 PetscCall(VecScatterDestroy(&vecscat)); 851 PetscCall(ISDestroy(&list1)); 852 PetscCall(ISDestroy(&list2)); 853 PetscCall(PetscFree(indx1)); 854 PetscCall(PetscFree(indx2)); 855 #endif 856 break; 857 858 default: 859 #if defined(PETSC_USE_COMPLEX) 860 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 861 862 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 863 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 864 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 865 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 866 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 867 PetscCall(VecScatterDestroy(&vecscat)); 868 PetscCall(ISDestroy(&list1)); 869 PetscCall(ISDestroy(&list2)); 870 #else 871 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 872 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 873 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 874 875 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 876 877 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 878 879 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 880 881 partial_dim = fftw->partial_dim; 882 883 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 884 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 885 886 if (dim[ndim - 1] % 2 == 0) NM = 2; 887 else NM = 1; 888 889 j = low; 890 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 891 indx1[i] = local_0_start * partial_dim + i; 892 indx2[i] = j; 893 if (k % dim[ndim - 1] == 0) j += NM; 894 j++; 895 } 896 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 897 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 898 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 899 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 900 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 901 PetscCall(VecScatterDestroy(&vecscat)); 902 PetscCall(ISDestroy(&list1)); 903 PetscCall(ISDestroy(&list2)); 904 PetscCall(PetscFree(indx1)); 905 PetscCall(PetscFree(indx2)); 906 #endif 907 break; 908 } 909 #endif 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 /*@ 915 VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 916 917 Collective 918 919 Input Parameters: 920 + A - `MATFFTW` matrix 921 - x - FFTW vector 922 923 Output Parameter: 924 . y - PETSc vector 925 926 Level: intermediate 927 928 Note: 929 While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 930 `VecScatterFFTWToPetsc()` removes those extra zeros. 931 932 .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 933 @*/ 934 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 935 { 936 PetscFunctionBegin; 937 PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 938 PetscFunctionReturn(PETSC_SUCCESS); 939 } 940 941 static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 942 { 943 MPI_Comm comm; 944 Mat_FFT *fft = (Mat_FFT *)A->data; 945 PetscInt low; 946 PetscMPIInt rank, size; 947 VecScatter vecscat; 948 IS list1; 949 950 PetscFunctionBegin; 951 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 952 PetscCallMPI(MPI_Comm_size(comm, &size)); 953 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 954 PetscCall(VecGetOwnershipRange(x, &low, NULL)); 955 956 if (size == 1) { 957 PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 958 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 959 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 960 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 961 PetscCall(VecScatterDestroy(&vecscat)); 962 PetscCall(ISDestroy(&list1)); 963 964 #if !PetscDefined(HAVE_MPIUNI) 965 } else { 966 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 967 PetscInt ndim = fft->ndim, *dim = fft->dim; 968 ptrdiff_t local_n0, local_0_start; 969 ptrdiff_t local_n1, local_1_start; 970 IS list2; 971 #if !defined(PETSC_USE_COMPLEX) 972 PetscInt i, j, k, partial_dim; 973 PetscInt *indx1, *indx2, tempindx, tempindx1; 974 PetscInt NM; 975 ptrdiff_t temp; 976 #endif 977 switch (ndim) { 978 case 1: 979 #if defined(PETSC_USE_COMPLEX) 980 fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 981 982 PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 983 PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 984 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 985 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 986 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 987 PetscCall(VecScatterDestroy(&vecscat)); 988 PetscCall(ISDestroy(&list1)); 989 PetscCall(ISDestroy(&list2)); 990 #else 991 SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 992 #endif 993 break; 994 case 2: 995 #if defined(PETSC_USE_COMPLEX) 996 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 997 998 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 999 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 1000 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1001 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1002 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1003 PetscCall(VecScatterDestroy(&vecscat)); 1004 PetscCall(ISDestroy(&list1)); 1005 PetscCall(ISDestroy(&list2)); 1006 #else 1007 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1008 1009 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx1)); 1010 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1], &indx2)); 1011 1012 if (dim[1] % 2 == 0) NM = dim[1] + 2; 1013 else NM = dim[1] + 1; 1014 1015 for (i = 0; i < local_n0; i++) { 1016 for (j = 0; j < dim[1]; j++) { 1017 tempindx = i * dim[1] + j; 1018 tempindx1 = i * NM + j; 1019 1020 indx1[tempindx] = local_0_start * dim[1] + tempindx; 1021 indx2[tempindx] = low + tempindx1; 1022 } 1023 } 1024 1025 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 1026 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 1027 1028 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1029 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1030 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1031 PetscCall(VecScatterDestroy(&vecscat)); 1032 PetscCall(ISDestroy(&list1)); 1033 PetscCall(ISDestroy(&list2)); 1034 PetscCall(PetscFree(indx1)); 1035 PetscCall(PetscFree(indx2)); 1036 #endif 1037 break; 1038 case 3: 1039 #if defined(PETSC_USE_COMPLEX) 1040 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1041 1042 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 1043 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 1044 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1045 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1046 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1047 PetscCall(VecScatterDestroy(&vecscat)); 1048 PetscCall(ISDestroy(&list1)); 1049 PetscCall(ISDestroy(&list2)); 1050 #else 1051 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1052 1053 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx1)); 1054 PetscCall(PetscMalloc1(((PetscInt)local_n0) * dim[1] * dim[2], &indx2)); 1055 1056 if (dim[2] % 2 == 0) NM = dim[2] + 2; 1057 else NM = dim[2] + 1; 1058 1059 for (i = 0; i < local_n0; i++) { 1060 for (j = 0; j < dim[1]; j++) { 1061 for (k = 0; k < dim[2]; k++) { 1062 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 1063 tempindx1 = i * dim[1] * NM + j * NM + k; 1064 1065 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 1066 indx2[tempindx] = low + tempindx1; 1067 } 1068 } 1069 } 1070 1071 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 1072 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 1073 1074 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1075 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1076 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1077 PetscCall(VecScatterDestroy(&vecscat)); 1078 PetscCall(ISDestroy(&list1)); 1079 PetscCall(ISDestroy(&list2)); 1080 PetscCall(PetscFree(indx1)); 1081 PetscCall(PetscFree(indx2)); 1082 #endif 1083 break; 1084 default: 1085 #if defined(PETSC_USE_COMPLEX) 1086 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 1087 1088 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 1089 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 1090 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1091 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1092 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1093 PetscCall(VecScatterDestroy(&vecscat)); 1094 PetscCall(ISDestroy(&list1)); 1095 PetscCall(ISDestroy(&list2)); 1096 #else 1097 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 1098 1099 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 1100 1101 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1102 1103 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1104 1105 partial_dim = fftw->partial_dim; 1106 1107 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx1)); 1108 PetscCall(PetscMalloc1(((PetscInt)local_n0) * partial_dim, &indx2)); 1109 1110 if (dim[ndim - 1] % 2 == 0) NM = 2; 1111 else NM = 1; 1112 1113 j = low; 1114 for (i = 0, k = 1; i < ((PetscInt)local_n0) * partial_dim; i++, k++) { 1115 indx1[i] = local_0_start * partial_dim + i; 1116 indx2[i] = j; 1117 if (k % dim[ndim - 1] == 0) j += NM; 1118 j++; 1119 } 1120 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 1121 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1122 1123 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1124 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1125 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1126 PetscCall(VecScatterDestroy(&vecscat)); 1127 PetscCall(ISDestroy(&list1)); 1128 PetscCall(ISDestroy(&list2)); 1129 PetscCall(PetscFree(indx1)); 1130 PetscCall(PetscFree(indx2)); 1131 #endif 1132 break; 1133 } 1134 #endif 1135 } 1136 PetscFunctionReturn(PETSC_SUCCESS); 1137 } 1138 1139 /*MC 1140 MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package. 1141 1142 Level: intermediate 1143 1144 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()` 1145 M*/ 1146 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1147 { 1148 MPI_Comm comm; 1149 Mat_FFT *fft = (Mat_FFT *)A->data; 1150 Mat_FFTW *fftw; 1151 PetscInt ndim = fft->ndim, *dim = fft->dim; 1152 const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 1153 unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1154 PetscBool flg; 1155 PetscInt p_flag, partial_dim = 1, ctr; 1156 PetscMPIInt size, rank; 1157 ptrdiff_t *pdim; 1158 #if !defined(PETSC_USE_COMPLEX) 1159 PetscInt tot_dim; 1160 #endif 1161 1162 PetscFunctionBegin; 1163 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1164 PetscCallMPI(MPI_Comm_size(comm, &size)); 1165 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1166 1167 #if !PetscDefined(HAVE_MPIUNI) 1168 fftw_mpi_init(); 1169 #endif 1170 pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 1171 pdim[0] = dim[0]; 1172 #if !defined(PETSC_USE_COMPLEX) 1173 tot_dim = 2 * dim[0]; 1174 #endif 1175 for (ctr = 1; ctr < ndim; ctr++) { 1176 partial_dim *= dim[ctr]; 1177 pdim[ctr] = dim[ctr]; 1178 #if !defined(PETSC_USE_COMPLEX) 1179 if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1180 else tot_dim *= dim[ctr]; 1181 #endif 1182 } 1183 1184 if (size == 1) { 1185 #if defined(PETSC_USE_COMPLEX) 1186 PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 1187 fft->n = fft->N; 1188 #else 1189 PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 1190 fft->n = tot_dim; 1191 #endif 1192 #if !PetscDefined(HAVE_MPIUNI) 1193 } else { 1194 ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 1195 #if !defined(PETSC_USE_COMPLEX) 1196 ptrdiff_t temp; 1197 PetscInt N1; 1198 #endif 1199 1200 switch (ndim) { 1201 case 1: 1202 #if !defined(PETSC_USE_COMPLEX) 1203 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1204 #else 1205 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1206 fft->n = (PetscInt)local_n0; 1207 PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1208 #endif 1209 break; 1210 case 2: 1211 #if defined(PETSC_USE_COMPLEX) 1212 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1213 fft->n = (PetscInt)local_n0 * dim[1]; 1214 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1215 #else 1216 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1217 1218 fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 1219 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 1220 #endif 1221 break; 1222 case 3: 1223 #if defined(PETSC_USE_COMPLEX) 1224 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1225 1226 fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 1227 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1228 #else 1229 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1230 1231 fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 1232 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 1233 #endif 1234 break; 1235 default: 1236 #if defined(PETSC_USE_COMPLEX) 1237 fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 1238 1239 fft->n = (PetscInt)local_n0 * partial_dim; 1240 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1241 #else 1242 temp = pdim[ndim - 1]; 1243 1244 pdim[ndim - 1] = temp / 2 + 1; 1245 1246 fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1247 1248 fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 1249 N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 1250 1251 pdim[ndim - 1] = temp; 1252 1253 PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1254 #endif 1255 break; 1256 } 1257 #endif 1258 } 1259 free(pdim); 1260 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 1261 PetscCall(PetscNew(&fftw)); 1262 fft->data = (void *)fftw; 1263 1264 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1265 fftw->partial_dim = partial_dim; 1266 1267 PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 1268 if (size == 1) { 1269 #if defined(PETSC_USE_64BIT_INDICES) 1270 fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1271 #else 1272 fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1273 #endif 1274 } 1275 1276 for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1277 1278 fftw->p_forward = NULL; 1279 fftw->p_backward = NULL; 1280 fftw->p_flag = FFTW_ESTIMATE; 1281 1282 if (size == 1) { 1283 A->ops->mult = MatMult_SeqFFTW; 1284 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1285 #if !PetscDefined(HAVE_MPIUNI) 1286 } else { 1287 A->ops->mult = MatMult_MPIFFTW; 1288 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1289 #endif 1290 } 1291 fft->matdestroy = MatDestroy_FFTW; 1292 A->assembled = PETSC_TRUE; 1293 A->preallocated = PETSC_TRUE; 1294 1295 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 1296 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 1297 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1298 1299 /* get runtime options */ 1300 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 1301 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 1302 if (flg) fftw->p_flag = iplans[p_flag]; 1303 PetscOptionsEnd(); 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } 1306