1 /* 2 Provides an interface to the FFTW package. 3 Testing examples can be found in ~src/mat/tests 4 */ 5 6 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 7 EXTERN_C_BEGIN 8 #if !PetscDefined(HAVE_MPIUNI) 9 #include <fftw3-mpi.h> 10 #else 11 #include <fftw3.h> 12 #endif 13 EXTERN_C_END 14 15 typedef struct { 16 ptrdiff_t ndim_fftw, *dim_fftw; 17 #if defined(PETSC_USE_64BIT_INDICES) 18 fftw_iodim64 *iodims; 19 #else 20 fftw_iodim *iodims; 21 #endif 22 PetscInt partial_dim; 23 fftw_plan p_forward, p_backward; 24 unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 25 PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */ 26 } Mat_FFTW; 27 28 extern PetscErrorCode MatMult_SeqFFTW(Mat, Vec, Vec); 29 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat, Vec, Vec); 30 extern PetscErrorCode MatDestroy_FFTW(Mat); 31 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 32 #if !PetscDefined(HAVE_MPIUNI) 33 extern PetscErrorCode MatMult_MPIFFTW(Mat, Vec, Vec); 34 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat, Vec, Vec); 35 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 36 #endif 37 38 PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 39 { 40 Mat_FFT *fft = (Mat_FFT *)A->data; 41 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 42 const PetscScalar *x_array; 43 PetscScalar *y_array; 44 Vec xx; 45 #if defined(PETSC_USE_COMPLEX) 46 #if defined(PETSC_USE_64BIT_INDICES) 47 fftw_iodim64 *iodims; 48 #else 49 fftw_iodim *iodims; 50 #endif 51 PetscInt i; 52 #endif 53 PetscInt ndim = fft->ndim, *dim = fft->dim; 54 55 PetscFunctionBegin; 56 if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) { 57 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 58 PetscCall(VecDuplicate(x, &xx)); 59 PetscCall(VecGetArrayRead(xx, &x_array)); 60 } else { 61 PetscCall(VecGetArrayRead(x, &x_array)); 62 } 63 PetscCall(VecGetArray(y, &y_array)); 64 if (!fftw->p_forward) { /* create a plan, then execute it */ 65 switch (ndim) { 66 case 1: 67 #if defined(PETSC_USE_COMPLEX) 68 fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 69 #else 70 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 71 #endif 72 break; 73 case 2: 74 #if defined(PETSC_USE_COMPLEX) 75 fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 76 #else 77 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 78 #endif 79 break; 80 case 3: 81 #if defined(PETSC_USE_COMPLEX) 82 fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 83 #else 84 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 85 #endif 86 break; 87 default: 88 #if defined(PETSC_USE_COMPLEX) 89 iodims = fftw->iodims; 90 #if defined(PETSC_USE_64BIT_INDICES) 91 if (ndim) { 92 iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 93 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 94 for (i = ndim - 2; i >= 0; --i) { 95 iodims[i].n = (ptrdiff_t)dim[i]; 96 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 97 } 98 } 99 fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 100 #else 101 if (ndim) { 102 iodims[ndim - 1].n = (int)dim[ndim - 1]; 103 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 104 for (i = ndim - 2; i >= 0; --i) { 105 iodims[i].n = (int)dim[i]; 106 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 107 } 108 } 109 fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 110 #endif 111 112 #else 113 fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 114 #endif 115 break; 116 } 117 if (fftw->p_flag != FFTW_ESTIMATE) { 118 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 119 PetscCall(VecRestoreArrayRead(xx, &x_array)); 120 PetscCall(VecDestroy(&xx)); 121 PetscCall(VecGetArrayRead(x, &x_array)); 122 } else { 123 fftw->finarray = (PetscScalar *)x_array; 124 fftw->foutarray = y_array; 125 } 126 } 127 128 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 129 #if defined(PETSC_USE_COMPLEX) 130 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 131 #else 132 fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 133 #endif 134 } else { 135 fftw_execute(fftw->p_forward); 136 } 137 PetscCall(VecRestoreArray(y, &y_array)); 138 PetscCall(VecRestoreArrayRead(x, &x_array)); 139 PetscFunctionReturn(PETSC_SUCCESS); 140 } 141 142 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 143 { 144 Mat_FFT *fft = (Mat_FFT *)A->data; 145 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 146 const PetscScalar *x_array; 147 PetscScalar *y_array; 148 PetscInt ndim = fft->ndim, *dim = fft->dim; 149 Vec xx; 150 #if defined(PETSC_USE_COMPLEX) 151 #if defined(PETSC_USE_64BIT_INDICES) 152 fftw_iodim64 *iodims = fftw->iodims; 153 #else 154 fftw_iodim *iodims = fftw->iodims; 155 #endif 156 #endif 157 158 PetscFunctionBegin; 159 if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) { 160 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 161 PetscCall(VecDuplicate(x, &xx)); 162 PetscCall(VecGetArrayRead(xx, &x_array)); 163 } else { 164 PetscCall(VecGetArrayRead(x, &x_array)); 165 } 166 PetscCall(VecGetArray(y, &y_array)); 167 if (!fftw->p_backward) { /* create a plan, then execute it */ 168 switch (ndim) { 169 case 1: 170 #if defined(PETSC_USE_COMPLEX) 171 fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 172 #else 173 fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 174 #endif 175 break; 176 case 2: 177 #if defined(PETSC_USE_COMPLEX) 178 fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 179 #else 180 fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 181 #endif 182 break; 183 case 3: 184 #if defined(PETSC_USE_COMPLEX) 185 fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 186 #else 187 fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 188 #endif 189 break; 190 default: 191 #if defined(PETSC_USE_COMPLEX) 192 #if defined(PETSC_USE_64BIT_INDICES) 193 fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 194 #else 195 fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 196 #endif 197 #else 198 fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 199 #endif 200 break; 201 } 202 if (fftw->p_flag != FFTW_ESTIMATE) { 203 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 204 PetscCall(VecRestoreArrayRead(xx, &x_array)); 205 PetscCall(VecDestroy(&xx)); 206 PetscCall(VecGetArrayRead(x, &x_array)); 207 } else { 208 fftw->binarray = (PetscScalar *)x_array; 209 fftw->boutarray = y_array; 210 } 211 } 212 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 213 #if defined(PETSC_USE_COMPLEX) 214 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 215 #else 216 fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 217 #endif 218 } else { 219 fftw_execute(fftw->p_backward); 220 } 221 PetscCall(VecRestoreArray(y, &y_array)); 222 PetscCall(VecRestoreArrayRead(x, &x_array)); 223 PetscFunctionReturn(PETSC_SUCCESS); 224 } 225 226 #if !PetscDefined(HAVE_MPIUNI) 227 PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 228 { 229 Mat_FFT *fft = (Mat_FFT *)A->data; 230 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 231 const PetscScalar *x_array; 232 PetscScalar *y_array; 233 PetscInt ndim = fft->ndim, *dim = fft->dim; 234 MPI_Comm comm; 235 Vec xx; 236 237 PetscFunctionBegin; 238 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 239 if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) { 240 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 241 PetscCall(VecDuplicate(x, &xx)); 242 PetscCall(VecGetArrayRead(xx, &x_array)); 243 } else { 244 PetscCall(VecGetArrayRead(x, &x_array)); 245 } 246 PetscCall(VecGetArray(y, &y_array)); 247 if (!fftw->p_forward) { /* create a plan, then execute it */ 248 switch (ndim) { 249 case 1: 250 #if defined(PETSC_USE_COMPLEX) 251 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 252 #else 253 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 254 #endif 255 break; 256 case 2: 257 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 258 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 259 #else 260 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 261 #endif 262 break; 263 case 3: 264 #if defined(PETSC_USE_COMPLEX) 265 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 266 #else 267 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 268 #endif 269 break; 270 default: 271 #if defined(PETSC_USE_COMPLEX) 272 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 273 #else 274 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 275 #endif 276 break; 277 } 278 if (fftw->p_flag != FFTW_ESTIMATE) { 279 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 280 PetscCall(VecRestoreArrayRead(xx, &x_array)); 281 PetscCall(VecDestroy(&xx)); 282 PetscCall(VecGetArrayRead(x, &x_array)); 283 } else { 284 fftw->finarray = (PetscScalar *)x_array; 285 fftw->foutarray = y_array; 286 } 287 } 288 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 289 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 290 } else { 291 fftw_execute(fftw->p_forward); 292 } 293 PetscCall(VecRestoreArray(y, &y_array)); 294 PetscCall(VecRestoreArrayRead(x, &x_array)); 295 PetscFunctionReturn(PETSC_SUCCESS); 296 } 297 298 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 299 { 300 Mat_FFT *fft = (Mat_FFT *)A->data; 301 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 302 const PetscScalar *x_array; 303 PetscScalar *y_array; 304 PetscInt ndim = fft->ndim, *dim = fft->dim; 305 MPI_Comm comm; 306 Vec xx; 307 308 PetscFunctionBegin; 309 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 310 if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) { 311 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 312 PetscCall(VecDuplicate(x, &xx)); 313 PetscCall(VecGetArrayRead(xx, &x_array)); 314 } else { 315 PetscCall(VecGetArrayRead(x, &x_array)); 316 } 317 PetscCall(VecGetArray(y, &y_array)); 318 if (!fftw->p_backward) { /* create a plan, then execute it */ 319 switch (ndim) { 320 case 1: 321 #if defined(PETSC_USE_COMPLEX) 322 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 323 #else 324 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 325 #endif 326 break; 327 case 2: 328 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 329 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 330 #else 331 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 332 #endif 333 break; 334 case 3: 335 #if defined(PETSC_USE_COMPLEX) 336 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 337 #else 338 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 339 #endif 340 break; 341 default: 342 #if defined(PETSC_USE_COMPLEX) 343 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 344 #else 345 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 346 #endif 347 break; 348 } 349 if (fftw->p_flag != FFTW_ESTIMATE) { 350 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 351 PetscCall(VecRestoreArrayRead(xx, &x_array)); 352 PetscCall(VecDestroy(&xx)); 353 PetscCall(VecGetArrayRead(x, &x_array)); 354 } else { 355 fftw->binarray = (PetscScalar *)x_array; 356 fftw->boutarray = y_array; 357 } 358 } 359 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 360 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 361 } else { 362 fftw_execute(fftw->p_backward); 363 } 364 PetscCall(VecRestoreArray(y, &y_array)); 365 PetscCall(VecRestoreArrayRead(x, &x_array)); 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 #endif 369 370 PetscErrorCode MatDestroy_FFTW(Mat A) 371 { 372 Mat_FFT *fft = (Mat_FFT *)A->data; 373 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 374 375 PetscFunctionBegin; 376 fftw_destroy_plan(fftw->p_forward); 377 fftw_destroy_plan(fftw->p_backward); 378 if (fftw->iodims) free(fftw->iodims); 379 PetscCall(PetscFree(fftw->dim_fftw)); 380 PetscCall(PetscFree(fft->data)); 381 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 382 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 383 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 384 #if !PetscDefined(HAVE_MPIUNI) 385 fftw_mpi_cleanup(); 386 #endif 387 PetscFunctionReturn(PETSC_SUCCESS); 388 } 389 390 #if !PetscDefined(HAVE_MPIUNI) 391 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 392 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 393 { 394 PetscScalar *array; 395 396 PetscFunctionBegin; 397 PetscCall(VecGetArray(v, &array)); 398 fftw_free((fftw_complex *)array); 399 PetscCall(VecRestoreArray(v, &array)); 400 PetscCall(VecDestroy_MPI(v)); 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 #endif 404 405 #if !PetscDefined(HAVE_MPIUNI) 406 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 407 { 408 Mat A; 409 410 PetscFunctionBegin; 411 PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 412 PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 413 PetscFunctionReturn(PETSC_SUCCESS); 414 } 415 416 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 417 { 418 Mat A; 419 420 PetscFunctionBegin; 421 PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 422 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } 425 426 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 427 { 428 Mat A; 429 430 PetscFunctionBegin; 431 PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 432 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 433 PetscFunctionReturn(PETSC_SUCCESS); 434 } 435 #endif 436 437 /*@ 438 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 439 parallel layout determined by `MATFFTW` 440 441 Collective 442 443 Input Parameter: 444 . A - the matrix 445 446 Output Parameters: 447 + x - (optional) input vector of forward FFTW 448 . y - (optional) output vector of forward FFTW 449 - z - (optional) output vector of backward FFTW 450 451 Options Database Key: 452 . -mat_fftw_plannerflags - set FFTW planner flags 453 454 Level: advanced 455 456 Notes: 457 The parallel layout of output of forward FFTW is always same as the input 458 of the backward FFTW. But parallel layout of the input vector of forward 459 FFTW might not be same as the output of backward FFTW. 460 461 We need to provide enough space while doing parallel real transform. 462 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 463 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 464 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 465 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 466 zeros if it is odd only one zero is needed. 467 468 Lastly one needs some scratch space at the end of data set in each process. alloc_local 469 figures out how much space is needed, i.e. it figures out the data+scratch space for 470 each processor and returns that. 471 472 Developer Notes: 473 How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 474 475 .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 476 @*/ 477 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 478 { 479 PetscFunctionBegin; 480 PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 481 PetscFunctionReturn(PETSC_SUCCESS); 482 } 483 484 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 485 { 486 PetscMPIInt size, rank; 487 MPI_Comm comm; 488 Mat_FFT *fft = (Mat_FFT *)A->data; 489 490 PetscFunctionBegin; 491 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 492 PetscValidType(A, 1); 493 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 494 495 PetscCallMPI(MPI_Comm_size(comm, &size)); 496 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 497 if (size == 1) { /* sequential case */ 498 #if defined(PETSC_USE_COMPLEX) 499 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 500 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 501 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 502 #else 503 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 504 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 505 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 506 #endif 507 #if !PetscDefined(HAVE_MPIUNI) 508 } else { /* parallel cases */ 509 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 510 PetscInt ndim = fft->ndim, *dim = fft->dim; 511 ptrdiff_t alloc_local, local_n0, local_0_start; 512 ptrdiff_t local_n1; 513 fftw_complex *data_fout; 514 ptrdiff_t local_1_start; 515 #if defined(PETSC_USE_COMPLEX) 516 fftw_complex *data_fin, *data_bout; 517 #else 518 double *data_finr, *data_boutr; 519 PetscInt n1, N1; 520 ptrdiff_t temp; 521 #endif 522 523 switch (ndim) { 524 case 1: 525 #if !defined(PETSC_USE_COMPLEX) 526 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 527 #else 528 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 529 if (fin) { 530 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 531 PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 532 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 533 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 534 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 535 } 536 if (fout) { 537 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 538 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 539 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 540 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 541 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 542 } 543 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 544 if (bout) { 545 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 546 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 547 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 548 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 549 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 550 } 551 break; 552 #endif 553 case 2: 554 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 555 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 556 N1 = 2 * dim[0] * (dim[1] / 2 + 1); 557 n1 = 2 * local_n0 * (dim[1] / 2 + 1); 558 if (fin) { 559 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 560 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 561 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 562 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 563 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 564 } 565 if (fout) { 566 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 567 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 568 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 569 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 570 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 571 } 572 if (bout) { 573 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 574 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 575 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 576 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 577 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 578 } 579 #else 580 /* Get local size */ 581 alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 582 if (fin) { 583 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 584 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 585 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 586 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 587 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 588 } 589 if (fout) { 590 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 591 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 592 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 593 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 594 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 595 } 596 if (bout) { 597 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 598 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 599 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 600 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 601 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 602 } 603 #endif 604 break; 605 case 3: 606 #if !defined(PETSC_USE_COMPLEX) 607 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 608 N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 609 n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 610 if (fin) { 611 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 612 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 613 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 614 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 615 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 616 } 617 if (fout) { 618 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 619 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 620 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 621 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 622 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 623 } 624 if (bout) { 625 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 626 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 627 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 628 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 629 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 630 } 631 #else 632 alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 633 if (fin) { 634 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 635 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 636 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 637 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 638 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 639 } 640 if (fout) { 641 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 642 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 643 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 644 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 645 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 646 } 647 if (bout) { 648 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 649 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 650 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 651 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 652 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 653 } 654 #endif 655 break; 656 default: 657 #if !defined(PETSC_USE_COMPLEX) 658 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 659 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 660 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 661 N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 662 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 663 664 if (fin) { 665 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 666 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 667 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 668 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 669 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 670 } 671 if (fout) { 672 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 673 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 674 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 675 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 676 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 677 } 678 if (bout) { 679 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 680 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 681 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 682 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 683 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 684 } 685 #else 686 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 687 if (fin) { 688 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 689 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 690 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 691 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 692 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 693 } 694 if (fout) { 695 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 696 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 697 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 698 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 699 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 700 } 701 if (bout) { 702 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 703 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 704 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 705 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 706 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 707 } 708 #endif 709 break; 710 } 711 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 712 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 713 memory leaks. We void these pointers here to avoid accident uses. 714 */ 715 if (fin) (*fin)->ops->replacearray = NULL; 716 if (fout) (*fout)->ops->replacearray = NULL; 717 if (bout) (*bout)->ops->replacearray = NULL; 718 #endif 719 } 720 PetscFunctionReturn(PETSC_SUCCESS); 721 } 722 723 /*@ 724 VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 725 726 Collective 727 728 Input Parameters: 729 + A - FFTW matrix 730 - x - the PETSc vector 731 732 Output Parameter: 733 . y - the FFTW vector 734 735 Level: intermediate 736 737 Note: 738 For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 739 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 740 zeros. This routine does that job by scattering operation. 741 742 .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 743 @*/ 744 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 745 { 746 PetscFunctionBegin; 747 PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 748 PetscFunctionReturn(PETSC_SUCCESS); 749 } 750 751 static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 752 { 753 MPI_Comm comm; 754 Mat_FFT *fft = (Mat_FFT *)A->data; 755 PetscInt low; 756 PetscMPIInt rank, size; 757 PetscInt vsize, vsize1; 758 VecScatter vecscat; 759 IS list1; 760 761 PetscFunctionBegin; 762 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 763 PetscCallMPI(MPI_Comm_size(comm, &size)); 764 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 765 PetscCall(VecGetOwnershipRange(y, &low, NULL)); 766 767 if (size == 1) { 768 PetscCall(VecGetSize(x, &vsize)); 769 PetscCall(VecGetSize(y, &vsize1)); 770 PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 771 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 772 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 773 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 774 PetscCall(VecScatterDestroy(&vecscat)); 775 PetscCall(ISDestroy(&list1)); 776 #if !PetscDefined(HAVE_MPIUNI) 777 } else { 778 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 779 PetscInt ndim = fft->ndim, *dim = fft->dim; 780 ptrdiff_t local_n0, local_0_start; 781 ptrdiff_t local_n1, local_1_start; 782 IS list2; 783 #if !defined(PETSC_USE_COMPLEX) 784 PetscInt i, j, k, partial_dim; 785 PetscInt *indx1, *indx2, tempindx, tempindx1; 786 PetscInt NM; 787 ptrdiff_t temp; 788 #endif 789 790 switch (ndim) { 791 case 1: 792 #if defined(PETSC_USE_COMPLEX) 793 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 794 795 PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 796 PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 797 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 798 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 799 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 800 PetscCall(VecScatterDestroy(&vecscat)); 801 PetscCall(ISDestroy(&list1)); 802 PetscCall(ISDestroy(&list2)); 803 #else 804 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 805 #endif 806 break; 807 case 2: 808 #if defined(PETSC_USE_COMPLEX) 809 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 810 811 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 812 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 813 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 814 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 815 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 816 PetscCall(VecScatterDestroy(&vecscat)); 817 PetscCall(ISDestroy(&list1)); 818 PetscCall(ISDestroy(&list2)); 819 #else 820 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 821 822 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1)); 823 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2)); 824 825 if (dim[1] % 2 == 0) { 826 NM = dim[1] + 2; 827 } else { 828 NM = dim[1] + 1; 829 } 830 for (i = 0; i < local_n0; i++) { 831 for (j = 0; j < dim[1]; j++) { 832 tempindx = i * dim[1] + j; 833 tempindx1 = i * NM + j; 834 835 indx1[tempindx] = local_0_start * dim[1] + tempindx; 836 indx2[tempindx] = low + tempindx1; 837 } 838 } 839 840 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 841 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 842 843 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 844 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 845 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 846 PetscCall(VecScatterDestroy(&vecscat)); 847 PetscCall(ISDestroy(&list1)); 848 PetscCall(ISDestroy(&list2)); 849 PetscCall(PetscFree(indx1)); 850 PetscCall(PetscFree(indx2)); 851 #endif 852 break; 853 854 case 3: 855 #if defined(PETSC_USE_COMPLEX) 856 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 857 858 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 859 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 860 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 861 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 862 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 863 PetscCall(VecScatterDestroy(&vecscat)); 864 PetscCall(ISDestroy(&list1)); 865 PetscCall(ISDestroy(&list2)); 866 #else 867 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 868 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 869 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 870 871 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1)); 872 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2)); 873 874 if (dim[2] % 2 == 0) NM = dim[2] + 2; 875 else NM = dim[2] + 1; 876 877 for (i = 0; i < local_n0; i++) { 878 for (j = 0; j < dim[1]; j++) { 879 for (k = 0; k < dim[2]; k++) { 880 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 881 tempindx1 = i * dim[1] * NM + j * NM + k; 882 883 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 884 indx2[tempindx] = low + tempindx1; 885 } 886 } 887 } 888 889 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 890 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 891 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 892 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 893 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 894 PetscCall(VecScatterDestroy(&vecscat)); 895 PetscCall(ISDestroy(&list1)); 896 PetscCall(ISDestroy(&list2)); 897 PetscCall(PetscFree(indx1)); 898 PetscCall(PetscFree(indx2)); 899 #endif 900 break; 901 902 default: 903 #if defined(PETSC_USE_COMPLEX) 904 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 905 906 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 907 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 908 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 909 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 910 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 911 PetscCall(VecScatterDestroy(&vecscat)); 912 PetscCall(ISDestroy(&list1)); 913 PetscCall(ISDestroy(&list2)); 914 #else 915 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 916 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 917 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 918 919 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 920 921 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 922 923 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 924 925 partial_dim = fftw->partial_dim; 926 927 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1)); 928 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2)); 929 930 if (dim[ndim - 1] % 2 == 0) NM = 2; 931 else NM = 1; 932 933 j = low; 934 for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) { 935 indx1[i] = local_0_start * partial_dim + i; 936 indx2[i] = j; 937 if (k % dim[ndim - 1] == 0) j += NM; 938 j++; 939 } 940 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 941 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 942 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 943 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 944 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 945 PetscCall(VecScatterDestroy(&vecscat)); 946 PetscCall(ISDestroy(&list1)); 947 PetscCall(ISDestroy(&list2)); 948 PetscCall(PetscFree(indx1)); 949 PetscCall(PetscFree(indx2)); 950 #endif 951 break; 952 } 953 #endif 954 } 955 PetscFunctionReturn(PETSC_SUCCESS); 956 } 957 958 /*@ 959 VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 960 961 Collective 962 963 Input Parameters: 964 + A - `MATFFTW` matrix 965 - x - FFTW vector 966 967 Output Parameter: 968 . y - PETSc vector 969 970 Level: intermediate 971 972 Note: 973 While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 974 `VecScatterFFTWToPetsc()` removes those extra zeros. 975 976 .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 977 @*/ 978 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 979 { 980 PetscFunctionBegin; 981 PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 982 PetscFunctionReturn(PETSC_SUCCESS); 983 } 984 985 static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 986 { 987 MPI_Comm comm; 988 Mat_FFT *fft = (Mat_FFT *)A->data; 989 PetscInt low; 990 PetscMPIInt rank, size; 991 VecScatter vecscat; 992 IS list1; 993 994 PetscFunctionBegin; 995 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 996 PetscCallMPI(MPI_Comm_size(comm, &size)); 997 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 998 PetscCall(VecGetOwnershipRange(x, &low, NULL)); 999 1000 if (size == 1) { 1001 PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 1002 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 1003 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1004 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1005 PetscCall(VecScatterDestroy(&vecscat)); 1006 PetscCall(ISDestroy(&list1)); 1007 1008 #if !PetscDefined(HAVE_MPIUNI) 1009 } else { 1010 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 1011 PetscInt ndim = fft->ndim, *dim = fft->dim; 1012 ptrdiff_t local_n0, local_0_start; 1013 ptrdiff_t local_n1, local_1_start; 1014 IS list2; 1015 #if !defined(PETSC_USE_COMPLEX) 1016 PetscInt i, j, k, partial_dim; 1017 PetscInt *indx1, *indx2, tempindx, tempindx1; 1018 PetscInt NM; 1019 ptrdiff_t temp; 1020 #endif 1021 switch (ndim) { 1022 case 1: 1023 #if defined(PETSC_USE_COMPLEX) 1024 fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1025 1026 PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 1027 PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 1028 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1029 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1030 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1031 PetscCall(VecScatterDestroy(&vecscat)); 1032 PetscCall(ISDestroy(&list1)); 1033 PetscCall(ISDestroy(&list2)); 1034 #else 1035 SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 1036 #endif 1037 break; 1038 case 2: 1039 #if defined(PETSC_USE_COMPLEX) 1040 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1041 1042 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 1043 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 1044 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1045 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1046 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1047 PetscCall(VecScatterDestroy(&vecscat)); 1048 PetscCall(ISDestroy(&list1)); 1049 PetscCall(ISDestroy(&list2)); 1050 #else 1051 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1052 1053 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1)); 1054 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2)); 1055 1056 if (dim[1] % 2 == 0) NM = dim[1] + 2; 1057 else NM = dim[1] + 1; 1058 1059 for (i = 0; i < local_n0; i++) { 1060 for (j = 0; j < dim[1]; j++) { 1061 tempindx = i * dim[1] + j; 1062 tempindx1 = i * NM + j; 1063 1064 indx1[tempindx] = local_0_start * dim[1] + tempindx; 1065 indx2[tempindx] = low + tempindx1; 1066 } 1067 } 1068 1069 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 1070 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 1071 1072 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1073 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1074 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1075 PetscCall(VecScatterDestroy(&vecscat)); 1076 PetscCall(ISDestroy(&list1)); 1077 PetscCall(ISDestroy(&list2)); 1078 PetscCall(PetscFree(indx1)); 1079 PetscCall(PetscFree(indx2)); 1080 #endif 1081 break; 1082 case 3: 1083 #if defined(PETSC_USE_COMPLEX) 1084 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1085 1086 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 1087 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 1088 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1089 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1090 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1091 PetscCall(VecScatterDestroy(&vecscat)); 1092 PetscCall(ISDestroy(&list1)); 1093 PetscCall(ISDestroy(&list2)); 1094 #else 1095 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1096 1097 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1)); 1098 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2)); 1099 1100 if (dim[2] % 2 == 0) NM = dim[2] + 2; 1101 else NM = dim[2] + 1; 1102 1103 for (i = 0; i < local_n0; i++) { 1104 for (j = 0; j < dim[1]; j++) { 1105 for (k = 0; k < dim[2]; k++) { 1106 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 1107 tempindx1 = i * dim[1] * NM + j * NM + k; 1108 1109 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 1110 indx2[tempindx] = low + tempindx1; 1111 } 1112 } 1113 } 1114 1115 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 1116 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 1117 1118 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1119 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1120 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1121 PetscCall(VecScatterDestroy(&vecscat)); 1122 PetscCall(ISDestroy(&list1)); 1123 PetscCall(ISDestroy(&list2)); 1124 PetscCall(PetscFree(indx1)); 1125 PetscCall(PetscFree(indx2)); 1126 #endif 1127 break; 1128 default: 1129 #if defined(PETSC_USE_COMPLEX) 1130 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 1131 1132 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 1133 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 1134 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1135 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1136 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1137 PetscCall(VecScatterDestroy(&vecscat)); 1138 PetscCall(ISDestroy(&list1)); 1139 PetscCall(ISDestroy(&list2)); 1140 #else 1141 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 1142 1143 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 1144 1145 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1146 1147 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1148 1149 partial_dim = fftw->partial_dim; 1150 1151 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1)); 1152 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2)); 1153 1154 if (dim[ndim - 1] % 2 == 0) NM = 2; 1155 else NM = 1; 1156 1157 j = low; 1158 for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) { 1159 indx1[i] = local_0_start * partial_dim + i; 1160 indx2[i] = j; 1161 if (k % dim[ndim - 1] == 0) j += NM; 1162 j++; 1163 } 1164 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 1165 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1166 1167 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1168 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1169 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1170 PetscCall(VecScatterDestroy(&vecscat)); 1171 PetscCall(ISDestroy(&list1)); 1172 PetscCall(ISDestroy(&list2)); 1173 PetscCall(PetscFree(indx1)); 1174 PetscCall(PetscFree(indx2)); 1175 #endif 1176 break; 1177 } 1178 #endif 1179 } 1180 PetscFunctionReturn(PETSC_SUCCESS); 1181 } 1182 1183 /*MC 1184 MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package. 1185 1186 Level: intermediate 1187 1188 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()` 1189 M*/ 1190 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1191 { 1192 MPI_Comm comm; 1193 Mat_FFT *fft = (Mat_FFT *)A->data; 1194 Mat_FFTW *fftw; 1195 PetscInt ndim = fft->ndim, *dim = fft->dim; 1196 const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 1197 unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1198 PetscBool flg; 1199 PetscInt p_flag, partial_dim = 1, ctr; 1200 PetscMPIInt size, rank; 1201 ptrdiff_t *pdim; 1202 #if !defined(PETSC_USE_COMPLEX) 1203 PetscInt tot_dim; 1204 #endif 1205 1206 PetscFunctionBegin; 1207 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1208 PetscCallMPI(MPI_Comm_size(comm, &size)); 1209 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1210 1211 #if !PetscDefined(HAVE_MPIUNI) 1212 fftw_mpi_init(); 1213 #endif 1214 pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 1215 pdim[0] = dim[0]; 1216 #if !defined(PETSC_USE_COMPLEX) 1217 tot_dim = 2 * dim[0]; 1218 #endif 1219 for (ctr = 1; ctr < ndim; ctr++) { 1220 partial_dim *= dim[ctr]; 1221 pdim[ctr] = dim[ctr]; 1222 #if !defined(PETSC_USE_COMPLEX) 1223 if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1224 else tot_dim *= dim[ctr]; 1225 #endif 1226 } 1227 1228 if (size == 1) { 1229 #if defined(PETSC_USE_COMPLEX) 1230 PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 1231 fft->n = fft->N; 1232 #else 1233 PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 1234 fft->n = tot_dim; 1235 #endif 1236 #if !PetscDefined(HAVE_MPIUNI) 1237 } else { 1238 ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 1239 #if !defined(PETSC_USE_COMPLEX) 1240 ptrdiff_t temp; 1241 PetscInt N1; 1242 #endif 1243 1244 switch (ndim) { 1245 case 1: 1246 #if !defined(PETSC_USE_COMPLEX) 1247 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1248 #else 1249 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1250 fft->n = (PetscInt)local_n0; 1251 PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1252 #endif 1253 break; 1254 case 2: 1255 #if defined(PETSC_USE_COMPLEX) 1256 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1257 fft->n = (PetscInt)local_n0 * dim[1]; 1258 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1259 #else 1260 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1261 1262 fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 1263 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 1264 #endif 1265 break; 1266 case 3: 1267 #if defined(PETSC_USE_COMPLEX) 1268 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1269 1270 fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 1271 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1272 #else 1273 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1274 1275 fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 1276 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 1277 #endif 1278 break; 1279 default: 1280 #if defined(PETSC_USE_COMPLEX) 1281 fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 1282 1283 fft->n = (PetscInt)local_n0 * partial_dim; 1284 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1285 #else 1286 temp = pdim[ndim - 1]; 1287 1288 pdim[ndim - 1] = temp / 2 + 1; 1289 1290 fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1291 1292 fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 1293 N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 1294 1295 pdim[ndim - 1] = temp; 1296 1297 PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1298 #endif 1299 break; 1300 } 1301 #endif 1302 } 1303 free(pdim); 1304 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 1305 PetscCall(PetscNew(&fftw)); 1306 fft->data = (void *)fftw; 1307 1308 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1309 fftw->partial_dim = partial_dim; 1310 1311 PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw)); 1312 if (size == 1) { 1313 #if defined(PETSC_USE_64BIT_INDICES) 1314 fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1315 #else 1316 fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1317 #endif 1318 } 1319 1320 for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1321 1322 fftw->p_forward = NULL; 1323 fftw->p_backward = NULL; 1324 fftw->p_flag = FFTW_ESTIMATE; 1325 1326 if (size == 1) { 1327 A->ops->mult = MatMult_SeqFFTW; 1328 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1329 #if !PetscDefined(HAVE_MPIUNI) 1330 } else { 1331 A->ops->mult = MatMult_MPIFFTW; 1332 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1333 #endif 1334 } 1335 fft->matdestroy = MatDestroy_FFTW; 1336 A->assembled = PETSC_TRUE; 1337 A->preallocated = PETSC_TRUE; 1338 1339 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 1340 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 1341 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1342 1343 /* get runtime options */ 1344 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 1345 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 1346 if (flg) fftw->p_flag = iplans[p_flag]; 1347 PetscOptionsEnd(); 1348 PetscFunctionReturn(PETSC_SUCCESS); 1349 } 1350