1 /* 2 Provides an interface to the FFTW package. 3 Testing examples can be found in ~src/mat/tests 4 */ 5 6 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 7 EXTERN_C_BEGIN 8 #if !PetscDefined(HAVE_MPIUNI) 9 #include <fftw3-mpi.h> 10 #else 11 #include <fftw3.h> 12 #endif 13 EXTERN_C_END 14 15 typedef struct { 16 PetscInt ndim_fftw; 17 ptrdiff_t *dim_fftw; 18 #if defined(PETSC_USE_64BIT_INDICES) 19 fftw_iodim64 *iodims; 20 #else 21 fftw_iodim *iodims; 22 #endif 23 PetscInt partial_dim; 24 fftw_plan p_forward, p_backward; 25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26 PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */ 27 } Mat_FFTW; 28 29 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 30 31 static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 32 { 33 Mat_FFT *fft = (Mat_FFT *)A->data; 34 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 35 const PetscScalar *x_array; 36 PetscScalar *y_array; 37 Vec xx; 38 #if defined(PETSC_USE_COMPLEX) 39 #if defined(PETSC_USE_64BIT_INDICES) 40 fftw_iodim64 *iodims; 41 #else 42 fftw_iodim *iodims; 43 #endif 44 PetscInt i; 45 #endif 46 PetscInt ndim = fft->ndim, *dim = fft->dim; 47 48 PetscFunctionBegin; 49 if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) { 50 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 51 PetscCall(VecDuplicate(x, &xx)); 52 PetscCall(VecGetArrayRead(xx, &x_array)); 53 } else { 54 PetscCall(VecGetArrayRead(x, &x_array)); 55 } 56 PetscCall(VecGetArray(y, &y_array)); 57 if (!fftw->p_forward) { /* create a plan, then execute it */ 58 switch (ndim) { 59 case 1: 60 #if defined(PETSC_USE_COMPLEX) 61 fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 62 #else 63 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 64 #endif 65 break; 66 case 2: 67 #if defined(PETSC_USE_COMPLEX) 68 fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 69 #else 70 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 71 #endif 72 break; 73 case 3: 74 #if defined(PETSC_USE_COMPLEX) 75 fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 76 #else 77 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 78 #endif 79 break; 80 default: 81 #if defined(PETSC_USE_COMPLEX) 82 iodims = fftw->iodims; 83 #if defined(PETSC_USE_64BIT_INDICES) 84 if (ndim) { 85 iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 86 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 87 for (i = ndim - 2; i >= 0; --i) { 88 iodims[i].n = (ptrdiff_t)dim[i]; 89 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 90 } 91 } 92 fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 93 #else 94 if (ndim) { 95 iodims[ndim - 1].n = (int)dim[ndim - 1]; 96 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 97 for (i = ndim - 2; i >= 0; --i) { 98 iodims[i].n = (int)dim[i]; 99 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 100 } 101 } 102 fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 103 #endif 104 105 #else 106 fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 107 #endif 108 break; 109 } 110 if (fftw->p_flag != FFTW_ESTIMATE) { 111 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 112 PetscCall(VecRestoreArrayRead(xx, &x_array)); 113 PetscCall(VecDestroy(&xx)); 114 PetscCall(VecGetArrayRead(x, &x_array)); 115 } else { 116 fftw->finarray = (PetscScalar *)x_array; 117 fftw->foutarray = y_array; 118 } 119 } 120 121 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 122 #if defined(PETSC_USE_COMPLEX) 123 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 124 #else 125 fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 126 #endif 127 } else { 128 fftw_execute(fftw->p_forward); 129 } 130 PetscCall(VecRestoreArray(y, &y_array)); 131 PetscCall(VecRestoreArrayRead(x, &x_array)); 132 PetscFunctionReturn(PETSC_SUCCESS); 133 } 134 135 static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 136 { 137 Mat_FFT *fft = (Mat_FFT *)A->data; 138 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 139 const PetscScalar *x_array; 140 PetscScalar *y_array; 141 PetscInt ndim = fft->ndim, *dim = fft->dim; 142 Vec xx; 143 #if defined(PETSC_USE_COMPLEX) 144 #if defined(PETSC_USE_64BIT_INDICES) 145 fftw_iodim64 *iodims = fftw->iodims; 146 #else 147 fftw_iodim *iodims = fftw->iodims; 148 #endif 149 #endif 150 151 PetscFunctionBegin; 152 if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) { 153 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 154 PetscCall(VecDuplicate(x, &xx)); 155 PetscCall(VecGetArrayRead(xx, &x_array)); 156 } else { 157 PetscCall(VecGetArrayRead(x, &x_array)); 158 } 159 PetscCall(VecGetArray(y, &y_array)); 160 if (!fftw->p_backward) { /* create a plan, then execute it */ 161 switch (ndim) { 162 case 1: 163 #if defined(PETSC_USE_COMPLEX) 164 fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 165 #else 166 fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 167 #endif 168 break; 169 case 2: 170 #if defined(PETSC_USE_COMPLEX) 171 fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 172 #else 173 fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 174 #endif 175 break; 176 case 3: 177 #if defined(PETSC_USE_COMPLEX) 178 fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 179 #else 180 fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 181 #endif 182 break; 183 default: 184 #if defined(PETSC_USE_COMPLEX) 185 #if defined(PETSC_USE_64BIT_INDICES) 186 fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 187 #else 188 fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 189 #endif 190 #else 191 fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 192 #endif 193 break; 194 } 195 if (fftw->p_flag != FFTW_ESTIMATE) { 196 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 197 PetscCall(VecRestoreArrayRead(xx, &x_array)); 198 PetscCall(VecDestroy(&xx)); 199 PetscCall(VecGetArrayRead(x, &x_array)); 200 } else { 201 fftw->binarray = (PetscScalar *)x_array; 202 fftw->boutarray = y_array; 203 } 204 } 205 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 206 #if defined(PETSC_USE_COMPLEX) 207 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 208 #else 209 fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 210 #endif 211 } else { 212 fftw_execute(fftw->p_backward); 213 } 214 PetscCall(VecRestoreArray(y, &y_array)); 215 PetscCall(VecRestoreArrayRead(x, &x_array)); 216 PetscFunctionReturn(PETSC_SUCCESS); 217 } 218 219 #if !PetscDefined(HAVE_MPIUNI) 220 static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 221 { 222 Mat_FFT *fft = (Mat_FFT *)A->data; 223 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 224 const PetscScalar *x_array; 225 PetscScalar *y_array; 226 PetscInt ndim = fft->ndim, *dim = fft->dim; 227 MPI_Comm comm; 228 Vec xx; 229 230 PetscFunctionBegin; 231 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 232 if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) { 233 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 234 PetscCall(VecDuplicate(x, &xx)); 235 PetscCall(VecGetArrayRead(xx, &x_array)); 236 } else { 237 PetscCall(VecGetArrayRead(x, &x_array)); 238 } 239 PetscCall(VecGetArray(y, &y_array)); 240 if (!fftw->p_forward) { /* create a plan, then execute it */ 241 switch (ndim) { 242 case 1: 243 #if defined(PETSC_USE_COMPLEX) 244 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 245 #else 246 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 247 #endif 248 break; 249 case 2: 250 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 251 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 252 #else 253 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 254 #endif 255 break; 256 case 3: 257 #if defined(PETSC_USE_COMPLEX) 258 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 259 #else 260 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 261 #endif 262 break; 263 default: 264 #if defined(PETSC_USE_COMPLEX) 265 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 266 #else 267 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 268 #endif 269 break; 270 } 271 if (fftw->p_flag != FFTW_ESTIMATE) { 272 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 273 PetscCall(VecRestoreArrayRead(xx, &x_array)); 274 PetscCall(VecDestroy(&xx)); 275 PetscCall(VecGetArrayRead(x, &x_array)); 276 } else { 277 fftw->finarray = (PetscScalar *)x_array; 278 fftw->foutarray = y_array; 279 } 280 } 281 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 282 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 283 } else { 284 fftw_execute(fftw->p_forward); 285 } 286 PetscCall(VecRestoreArray(y, &y_array)); 287 PetscCall(VecRestoreArrayRead(x, &x_array)); 288 PetscFunctionReturn(PETSC_SUCCESS); 289 } 290 291 static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 292 { 293 Mat_FFT *fft = (Mat_FFT *)A->data; 294 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 295 const PetscScalar *x_array; 296 PetscScalar *y_array; 297 PetscInt ndim = fft->ndim, *dim = fft->dim; 298 MPI_Comm comm; 299 Vec xx; 300 301 PetscFunctionBegin; 302 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 303 if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) { 304 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 305 PetscCall(VecDuplicate(x, &xx)); 306 PetscCall(VecGetArrayRead(xx, &x_array)); 307 } else { 308 PetscCall(VecGetArrayRead(x, &x_array)); 309 } 310 PetscCall(VecGetArray(y, &y_array)); 311 if (!fftw->p_backward) { /* create a plan, then execute it */ 312 switch (ndim) { 313 case 1: 314 #if defined(PETSC_USE_COMPLEX) 315 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 316 #else 317 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 318 #endif 319 break; 320 case 2: 321 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 322 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 323 #else 324 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 325 #endif 326 break; 327 case 3: 328 #if defined(PETSC_USE_COMPLEX) 329 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 330 #else 331 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 332 #endif 333 break; 334 default: 335 #if defined(PETSC_USE_COMPLEX) 336 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 337 #else 338 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 339 #endif 340 break; 341 } 342 if (fftw->p_flag != FFTW_ESTIMATE) { 343 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 344 PetscCall(VecRestoreArrayRead(xx, &x_array)); 345 PetscCall(VecDestroy(&xx)); 346 PetscCall(VecGetArrayRead(x, &x_array)); 347 } else { 348 fftw->binarray = (PetscScalar *)x_array; 349 fftw->boutarray = y_array; 350 } 351 } 352 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 353 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 354 } else { 355 fftw_execute(fftw->p_backward); 356 } 357 PetscCall(VecRestoreArray(y, &y_array)); 358 PetscCall(VecRestoreArrayRead(x, &x_array)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 #endif 362 363 static PetscErrorCode MatDestroy_FFTW(Mat A) 364 { 365 Mat_FFT *fft = (Mat_FFT *)A->data; 366 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 367 368 PetscFunctionBegin; 369 fftw_destroy_plan(fftw->p_forward); 370 fftw_destroy_plan(fftw->p_backward); 371 if (fftw->iodims) free(fftw->iodims); 372 PetscCall(PetscFree(fftw->dim_fftw)); 373 PetscCall(PetscFree(fft->data)); 374 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 375 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 376 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 377 #if !PetscDefined(HAVE_MPIUNI) 378 fftw_mpi_cleanup(); 379 #endif 380 PetscFunctionReturn(PETSC_SUCCESS); 381 } 382 383 #if !PetscDefined(HAVE_MPIUNI) 384 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 385 static PetscErrorCode VecDestroy_MPIFFTW(Vec v) 386 { 387 PetscScalar *array; 388 389 PetscFunctionBegin; 390 PetscCall(VecGetArray(v, &array)); 391 fftw_free((fftw_complex *)array); 392 PetscCall(VecRestoreArray(v, &array)); 393 PetscCall(VecDestroy_MPI(v)); 394 PetscFunctionReturn(PETSC_SUCCESS); 395 } 396 #endif 397 398 #if !PetscDefined(HAVE_MPIUNI) 399 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 400 { 401 Mat A; 402 403 PetscFunctionBegin; 404 PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 405 PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 406 PetscFunctionReturn(PETSC_SUCCESS); 407 } 408 409 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 410 { 411 Mat A; 412 413 PetscFunctionBegin; 414 PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 415 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 416 PetscFunctionReturn(PETSC_SUCCESS); 417 } 418 419 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 420 { 421 Mat A; 422 423 PetscFunctionBegin; 424 PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 425 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 426 PetscFunctionReturn(PETSC_SUCCESS); 427 } 428 #endif 429 430 /*@ 431 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 432 parallel layout determined by `MATFFTW` 433 434 Collective 435 436 Input Parameter: 437 . A - the matrix 438 439 Output Parameters: 440 + x - (optional) input vector of forward FFTW 441 . y - (optional) output vector of forward FFTW 442 - z - (optional) output vector of backward FFTW 443 444 Options Database Key: 445 . -mat_fftw_plannerflags - set FFTW planner flags 446 447 Level: advanced 448 449 Notes: 450 The parallel layout of output of forward FFTW is always same as the input 451 of the backward FFTW. But parallel layout of the input vector of forward 452 FFTW might not be same as the output of backward FFTW. 453 454 We need to provide enough space while doing parallel real transform. 455 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 456 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 457 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 458 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 459 zeros if it is odd only one zero is needed. 460 461 Lastly one needs some scratch space at the end of data set in each process. alloc_local 462 figures out how much space is needed, i.e. it figures out the data+scratch space for 463 each processor and returns that. 464 465 Developer Notes: 466 How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 467 468 .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 469 @*/ 470 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 471 { 472 PetscFunctionBegin; 473 PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 474 PetscFunctionReturn(PETSC_SUCCESS); 475 } 476 477 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 478 { 479 PetscMPIInt size, rank; 480 MPI_Comm comm; 481 Mat_FFT *fft = (Mat_FFT *)A->data; 482 483 PetscFunctionBegin; 484 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 485 PetscValidType(A, 1); 486 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 487 488 PetscCallMPI(MPI_Comm_size(comm, &size)); 489 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 490 if (size == 1) { /* sequential case */ 491 #if defined(PETSC_USE_COMPLEX) 492 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 493 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 494 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 495 #else 496 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 497 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 498 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 499 #endif 500 #if !PetscDefined(HAVE_MPIUNI) 501 } else { /* parallel cases */ 502 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 503 PetscInt ndim = fft->ndim, *dim = fft->dim; 504 ptrdiff_t alloc_local, local_n0, local_0_start; 505 ptrdiff_t local_n1; 506 fftw_complex *data_fout; 507 ptrdiff_t local_1_start; 508 PetscInt n1, N1; 509 #if defined(PETSC_USE_COMPLEX) 510 fftw_complex *data_fin, *data_bout; 511 #else 512 double *data_finr, *data_boutr; 513 ptrdiff_t temp; 514 #endif 515 516 switch (ndim) { 517 case 1: 518 #if !defined(PETSC_USE_COMPLEX) 519 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 520 #else 521 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 522 if (fin) { 523 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 524 PetscCall(PetscIntCast(local_n0, &n1)); 525 PetscCall(PetscIntCast(fft->N, &N1)); 526 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fin, fin)); 527 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 528 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 529 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 530 } 531 if (fout) { 532 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 533 PetscCall(PetscIntCast(local_n1, &n1)); 534 PetscCall(PetscIntCast(fft->N, &N1)); 535 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_fout, fout)); 536 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 537 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 538 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 541 if (bout) { 542 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 543 PetscCall(PetscIntCast(local_n1, &n1)); 544 PetscCall(PetscIntCast(fft->N, &N1)); 545 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (const PetscScalar *)data_bout, bout)); 546 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 547 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 548 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 549 } 550 break; 551 #endif 552 case 2: 553 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 554 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 555 PetscCall(PetscIntCast(2 * dim[0] * (dim[1] / 2 + 1), &N1)); 556 PetscCall(PetscIntCast(2 * local_n0 * (dim[1] / 2 + 1), &n1)); 557 if (fin) { 558 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 559 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 560 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 561 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 562 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 563 } 564 if (fout) { 565 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 566 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 567 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 568 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 569 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 570 } 571 if (bout) { 572 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 573 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 574 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 575 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 576 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 577 } 578 #else 579 /* Get local size */ 580 alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 581 if (fin) { 582 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 583 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 584 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 585 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 586 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 587 } 588 if (fout) { 589 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 590 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 591 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 592 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 593 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 594 } 595 if (bout) { 596 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 597 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 598 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 599 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 600 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 601 } 602 #endif 603 break; 604 case 3: 605 #if !defined(PETSC_USE_COMPLEX) 606 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 607 PetscCall(PetscIntCast(2 * dim[0] * dim[1] * (dim[2] / 2 + 1), &N1)); 608 PetscCall(PetscIntCast(2 * local_n0 * dim[1] * (dim[2] / 2 + 1), &n1)); 609 if (fin) { 610 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 611 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 612 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 613 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 614 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 615 } 616 if (fout) { 617 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 618 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 619 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 620 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 621 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 622 } 623 if (bout) { 624 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 625 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 626 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 627 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 628 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 629 } 630 #else 631 alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 632 if (fin) { 633 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 634 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 635 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 636 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 637 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 638 } 639 if (fout) { 640 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 641 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 642 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 643 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 644 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 645 } 646 if (bout) { 647 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 648 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 649 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 650 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 651 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 652 } 653 #endif 654 break; 655 default: 656 #if !defined(PETSC_USE_COMPLEX) 657 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 658 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 659 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 660 N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 661 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 662 663 if (fin) { 664 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 665 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 666 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 667 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 668 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 669 } 670 if (fout) { 671 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 672 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 673 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 674 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 675 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 676 } 677 if (bout) { 678 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 679 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 680 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 681 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 682 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 683 } 684 #else 685 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 686 if (fin) { 687 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 688 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 689 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 690 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 691 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 692 } 693 if (fout) { 694 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 695 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 696 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 697 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 698 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 699 } 700 if (bout) { 701 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 702 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 703 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 704 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 705 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 706 } 707 #endif 708 break; 709 } 710 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 711 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 712 memory leaks. We void these pointers here to avoid accident uses. 713 */ 714 if (fin) (*fin)->ops->replacearray = NULL; 715 if (fout) (*fout)->ops->replacearray = NULL; 716 if (bout) (*bout)->ops->replacearray = NULL; 717 #endif 718 } 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 722 /*@ 723 VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 724 725 Collective 726 727 Input Parameters: 728 + A - FFTW matrix 729 - x - the PETSc vector 730 731 Output Parameter: 732 . y - the FFTW vector 733 734 Level: intermediate 735 736 Note: 737 For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 738 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 739 zeros. This routine does that job by scattering operation. 740 741 .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 742 @*/ 743 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 744 { 745 PetscFunctionBegin; 746 PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 747 PetscFunctionReturn(PETSC_SUCCESS); 748 } 749 750 static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 751 { 752 MPI_Comm comm; 753 Mat_FFT *fft = (Mat_FFT *)A->data; 754 PetscInt low; 755 PetscMPIInt rank, size; 756 PetscInt vsize, vsize1; 757 VecScatter vecscat; 758 IS list1; 759 760 PetscFunctionBegin; 761 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 762 PetscCallMPI(MPI_Comm_size(comm, &size)); 763 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 764 PetscCall(VecGetOwnershipRange(y, &low, NULL)); 765 766 if (size == 1) { 767 PetscCall(VecGetSize(x, &vsize)); 768 PetscCall(VecGetSize(y, &vsize1)); 769 PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 770 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 771 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 772 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 773 PetscCall(VecScatterDestroy(&vecscat)); 774 PetscCall(ISDestroy(&list1)); 775 #if !PetscDefined(HAVE_MPIUNI) 776 } else { 777 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 778 PetscInt ndim = fft->ndim, *dim = fft->dim, n1; 779 ptrdiff_t local_n0, local_0_start; 780 ptrdiff_t local_n1, local_1_start; 781 IS list2; 782 #if !defined(PETSC_USE_COMPLEX) 783 PetscInt i, j, k, partial_dim; 784 PetscInt *indx1, *indx2, tempindx, tempindx1; 785 PetscInt NM; 786 ptrdiff_t temp; 787 #else 788 PetscInt nstart, nlow; 789 #endif 790 791 switch (ndim) { 792 case 1: 793 #if defined(PETSC_USE_COMPLEX) 794 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 795 796 PetscCall(PetscIntCast(local_n0, &n1)); 797 PetscCall(PetscIntCast(local_0_start, &nstart)); 798 PetscCall(PetscIntCast(low, &nlow)); 799 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1)); 800 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2)); 801 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 802 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 803 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 804 PetscCall(VecScatterDestroy(&vecscat)); 805 PetscCall(ISDestroy(&list1)); 806 PetscCall(ISDestroy(&list2)); 807 #else 808 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 809 #endif 810 break; 811 case 2: 812 #if defined(PETSC_USE_COMPLEX) 813 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 814 815 PetscCall(PetscIntCast(local_n0 * dim[1], &n1)); 816 PetscCall(PetscIntCast(local_0_start * dim[1], &nstart)); 817 PetscCall(PetscIntCast(low, &nlow)); 818 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1)); 819 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2)); 820 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 821 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 822 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 823 PetscCall(VecScatterDestroy(&vecscat)); 824 PetscCall(ISDestroy(&list1)); 825 PetscCall(ISDestroy(&list2)); 826 #else 827 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 828 829 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1)); 830 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2)); 831 832 if (dim[1] % 2 == 0) { 833 NM = dim[1] + 2; 834 } else { 835 NM = dim[1] + 1; 836 } 837 for (i = 0; i < local_n0; i++) { 838 for (j = 0; j < dim[1]; j++) { 839 tempindx = i * dim[1] + j; 840 tempindx1 = i * NM + j; 841 842 PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx])); 843 indx2[tempindx] = low + tempindx1; 844 } 845 } 846 847 PetscCall(PetscIntCast(local_n0 * dim[1], &n1)); 848 PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1)); 849 PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2)); 850 851 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 852 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 853 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 854 PetscCall(VecScatterDestroy(&vecscat)); 855 PetscCall(ISDestroy(&list1)); 856 PetscCall(ISDestroy(&list2)); 857 PetscCall(PetscFree(indx1)); 858 PetscCall(PetscFree(indx2)); 859 #endif 860 break; 861 862 case 3: 863 #if defined(PETSC_USE_COMPLEX) 864 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 865 866 PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1)); 867 PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart)); 868 PetscCall(PetscIntCast(low, &nlow)); 869 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1)); 870 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2)); 871 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 872 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 873 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 874 PetscCall(VecScatterDestroy(&vecscat)); 875 PetscCall(ISDestroy(&list1)); 876 PetscCall(ISDestroy(&list2)); 877 #else 878 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 879 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 880 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 881 882 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1)); 883 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2)); 884 885 if (dim[2] % 2 == 0) NM = dim[2] + 2; 886 else NM = dim[2] + 1; 887 888 for (i = 0; i < local_n0; i++) { 889 for (j = 0; j < dim[1]; j++) { 890 for (k = 0; k < dim[2]; k++) { 891 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 892 tempindx1 = i * dim[1] * NM + j * NM + k; 893 894 PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx])); 895 PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx])); 896 } 897 } 898 } 899 900 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 901 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 902 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 903 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 904 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 905 PetscCall(VecScatterDestroy(&vecscat)); 906 PetscCall(ISDestroy(&list1)); 907 PetscCall(ISDestroy(&list2)); 908 PetscCall(PetscFree(indx1)); 909 PetscCall(PetscFree(indx2)); 910 #endif 911 break; 912 913 default: 914 #if defined(PETSC_USE_COMPLEX) 915 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 916 917 PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1)); 918 PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart)); 919 PetscCall(PetscIntCast(low, &nlow)); 920 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1)); 921 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2)); 922 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 923 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 924 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 925 PetscCall(VecScatterDestroy(&vecscat)); 926 PetscCall(ISDestroy(&list1)); 927 PetscCall(ISDestroy(&list2)); 928 #else 929 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 930 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 931 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 932 933 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 934 935 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 936 937 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 938 939 partial_dim = fftw->partial_dim; 940 941 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1)); 942 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2)); 943 944 if (dim[ndim - 1] % 2 == 0) NM = 2; 945 else NM = 1; 946 947 j = low; 948 for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) { 949 indx1[i] = local_0_start * partial_dim + i; 950 indx2[i] = j; 951 if (k % dim[ndim - 1] == 0) j += NM; 952 j++; 953 } 954 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 955 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 956 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 957 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 958 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 959 PetscCall(VecScatterDestroy(&vecscat)); 960 PetscCall(ISDestroy(&list1)); 961 PetscCall(ISDestroy(&list2)); 962 PetscCall(PetscFree(indx1)); 963 PetscCall(PetscFree(indx2)); 964 #endif 965 break; 966 } 967 #endif 968 } 969 PetscFunctionReturn(PETSC_SUCCESS); 970 } 971 972 /*@ 973 VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 974 975 Collective 976 977 Input Parameters: 978 + A - `MATFFTW` matrix 979 - x - FFTW vector 980 981 Output Parameter: 982 . y - PETSc vector 983 984 Level: intermediate 985 986 Note: 987 While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 988 `VecScatterFFTWToPetsc()` removes those extra zeros. 989 990 .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 991 @*/ 992 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 993 { 994 PetscFunctionBegin; 995 PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 996 PetscFunctionReturn(PETSC_SUCCESS); 997 } 998 999 static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 1000 { 1001 MPI_Comm comm; 1002 Mat_FFT *fft = (Mat_FFT *)A->data; 1003 PetscInt low; 1004 PetscMPIInt rank, size; 1005 VecScatter vecscat; 1006 IS list1; 1007 1008 PetscFunctionBegin; 1009 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1010 PetscCallMPI(MPI_Comm_size(comm, &size)); 1011 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1012 PetscCall(VecGetOwnershipRange(x, &low, NULL)); 1013 1014 if (size == 1) { 1015 PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 1016 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 1017 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1018 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1019 PetscCall(VecScatterDestroy(&vecscat)); 1020 PetscCall(ISDestroy(&list1)); 1021 1022 #if !PetscDefined(HAVE_MPIUNI) 1023 } else { 1024 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 1025 PetscInt ndim = fft->ndim, *dim = fft->dim; 1026 ptrdiff_t local_n0, local_0_start; 1027 ptrdiff_t local_n1, local_1_start; 1028 IS list2; 1029 #if !defined(PETSC_USE_COMPLEX) 1030 PetscInt i, j, k, partial_dim; 1031 PetscInt *indx1, *indx2, tempindx, tempindx1; 1032 PetscInt NM; 1033 ptrdiff_t temp; 1034 #else 1035 PetscInt nlow, nstart; 1036 #endif 1037 PetscInt n1; 1038 switch (ndim) { 1039 case 1: 1040 #if defined(PETSC_USE_COMPLEX) 1041 fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1042 1043 PetscCall(PetscIntCast(local_n1, &n1)); 1044 PetscCall(PetscIntCast(local_1_start, &nstart)); 1045 PetscCall(PetscIntCast(low, &nlow)); 1046 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1)); 1047 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2)); 1048 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1049 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1050 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1051 PetscCall(VecScatterDestroy(&vecscat)); 1052 PetscCall(ISDestroy(&list1)); 1053 PetscCall(ISDestroy(&list2)); 1054 #else 1055 SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 1056 #endif 1057 break; 1058 case 2: 1059 #if defined(PETSC_USE_COMPLEX) 1060 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1061 1062 PetscCall(PetscIntCast(local_n0 * dim[1], &n1)); 1063 PetscCall(PetscIntCast(local_0_start * dim[1], &nstart)); 1064 PetscCall(PetscIntCast(low, &nlow)); 1065 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1)); 1066 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2)); 1067 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1068 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1069 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1070 PetscCall(VecScatterDestroy(&vecscat)); 1071 PetscCall(ISDestroy(&list1)); 1072 PetscCall(ISDestroy(&list2)); 1073 #else 1074 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1075 1076 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1)); 1077 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2)); 1078 1079 if (dim[1] % 2 == 0) NM = dim[1] + 2; 1080 else NM = dim[1] + 1; 1081 1082 for (i = 0; i < local_n0; i++) { 1083 for (j = 0; j < dim[1]; j++) { 1084 tempindx = i * dim[1] + j; 1085 tempindx1 = i * NM + j; 1086 1087 PetscCall(PetscIntCast(local_0_start * dim[1] + tempindx, &indx1[tempindx])); 1088 PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx])); 1089 } 1090 } 1091 1092 PetscCall(PetscIntCast(local_n0 * dim[1], &n1)); 1093 PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1)); 1094 PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2)); 1095 1096 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1097 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1098 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1099 PetscCall(VecScatterDestroy(&vecscat)); 1100 PetscCall(ISDestroy(&list1)); 1101 PetscCall(ISDestroy(&list2)); 1102 PetscCall(PetscFree(indx1)); 1103 PetscCall(PetscFree(indx2)); 1104 #endif 1105 break; 1106 case 3: 1107 #if defined(PETSC_USE_COMPLEX) 1108 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1109 1110 PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1)); 1111 PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2], &nstart)); 1112 PetscCall(PetscIntCast(low, &nlow)); 1113 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1)); 1114 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2)); 1115 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1116 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1117 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1118 PetscCall(VecScatterDestroy(&vecscat)); 1119 PetscCall(ISDestroy(&list1)); 1120 PetscCall(ISDestroy(&list2)); 1121 #else 1122 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1123 1124 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1)); 1125 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2)); 1126 1127 if (dim[2] % 2 == 0) NM = dim[2] + 2; 1128 else NM = dim[2] + 1; 1129 1130 for (i = 0; i < local_n0; i++) { 1131 for (j = 0; j < dim[1]; j++) { 1132 for (k = 0; k < dim[2]; k++) { 1133 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 1134 tempindx1 = i * dim[1] * NM + j * NM + k; 1135 1136 PetscCall(PetscIntCast(local_0_start * dim[1] * dim[2] + tempindx, &indx1[tempindx])); 1137 PetscCall(PetscIntCast(low + tempindx1, &indx2[tempindx])); 1138 } 1139 } 1140 } 1141 1142 PetscCall(PetscIntCast(local_n0 * dim[1] * dim[2], &n1)); 1143 PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1)); 1144 PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2)); 1145 1146 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1147 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1148 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1149 PetscCall(VecScatterDestroy(&vecscat)); 1150 PetscCall(ISDestroy(&list1)); 1151 PetscCall(ISDestroy(&list2)); 1152 PetscCall(PetscFree(indx1)); 1153 PetscCall(PetscFree(indx2)); 1154 #endif 1155 break; 1156 default: 1157 #if defined(PETSC_USE_COMPLEX) 1158 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 1159 1160 PetscCall(PetscIntCast(local_n0 * (fftw->partial_dim), &n1)); 1161 PetscCall(PetscIntCast(local_0_start * (fftw->partial_dim), &nstart)); 1162 PetscCall(PetscIntCast(low, &nlow)); 1163 PetscCall(ISCreateStride(comm, n1, nstart, 1, &list1)); 1164 PetscCall(ISCreateStride(comm, n1, nlow, 1, &list2)); 1165 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1166 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1167 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1168 PetscCall(VecScatterDestroy(&vecscat)); 1169 PetscCall(ISDestroy(&list1)); 1170 PetscCall(ISDestroy(&list2)); 1171 #else 1172 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 1173 1174 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 1175 1176 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1177 1178 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1179 1180 partial_dim = fftw->partial_dim; 1181 1182 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1)); 1183 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2)); 1184 1185 if (dim[ndim - 1] % 2 == 0) NM = 2; 1186 else NM = 1; 1187 1188 j = low; 1189 for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) { 1190 PetscCall(PetscIntCast(local_0_start * partial_dim + i, &indx1[i])); 1191 indx2[i] = j; 1192 if (k % dim[ndim - 1] == 0) j += NM; 1193 j++; 1194 } 1195 PetscCall(PetscIntCast(local_n0 * partial_dim, &n1)); 1196 PetscCall(ISCreateGeneral(comm, n1, indx1, PETSC_COPY_VALUES, &list1)); 1197 PetscCall(ISCreateGeneral(comm, n1, indx2, PETSC_COPY_VALUES, &list2)); 1198 1199 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1200 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1201 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1202 PetscCall(VecScatterDestroy(&vecscat)); 1203 PetscCall(ISDestroy(&list1)); 1204 PetscCall(ISDestroy(&list2)); 1205 PetscCall(PetscFree(indx1)); 1206 PetscCall(PetscFree(indx2)); 1207 #endif 1208 break; 1209 } 1210 #endif 1211 } 1212 PetscFunctionReturn(PETSC_SUCCESS); 1213 } 1214 1215 /*MC 1216 MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package. 1217 1218 Level: intermediate 1219 1220 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()` 1221 M*/ 1222 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1223 { 1224 MPI_Comm comm; 1225 Mat_FFT *fft = (Mat_FFT *)A->data; 1226 Mat_FFTW *fftw; 1227 PetscInt ndim = fft->ndim, *dim = fft->dim; 1228 const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 1229 unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1230 PetscBool flg; 1231 PetscInt p_flag, partial_dim = 1, ctr; 1232 PetscMPIInt size, rank; 1233 ptrdiff_t *pdim; 1234 #if !defined(PETSC_USE_COMPLEX) 1235 PetscInt tot_dim; 1236 #endif 1237 1238 PetscFunctionBegin; 1239 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1240 PetscCallMPI(MPI_Comm_size(comm, &size)); 1241 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1242 1243 #if !PetscDefined(HAVE_MPIUNI) 1244 fftw_mpi_init(); 1245 #endif 1246 pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 1247 pdim[0] = dim[0]; 1248 #if !defined(PETSC_USE_COMPLEX) 1249 tot_dim = 2 * dim[0]; 1250 #endif 1251 for (ctr = 1; ctr < ndim; ctr++) { 1252 partial_dim *= dim[ctr]; 1253 pdim[ctr] = dim[ctr]; 1254 #if !defined(PETSC_USE_COMPLEX) 1255 if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1256 else tot_dim *= dim[ctr]; 1257 #endif 1258 } 1259 1260 if (size == 1) { 1261 #if defined(PETSC_USE_COMPLEX) 1262 PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 1263 fft->n = fft->N; 1264 #else 1265 PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 1266 fft->n = tot_dim; 1267 #endif 1268 #if !PetscDefined(HAVE_MPIUNI) 1269 } else { 1270 ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 1271 #if !defined(PETSC_USE_COMPLEX) 1272 ptrdiff_t temp; 1273 PetscInt N1; 1274 #else 1275 PetscInt n1; 1276 #endif 1277 1278 switch (ndim) { 1279 case 1: 1280 #if !defined(PETSC_USE_COMPLEX) 1281 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1282 #else 1283 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1284 PetscCall(PetscIntCast(local_n0, &fft->n)); 1285 PetscCall(PetscIntCast(local_n1, &n1)); 1286 PetscCall(MatSetSizes(A, n1, fft->n, fft->N, fft->N)); 1287 #endif 1288 break; 1289 case 2: 1290 #if defined(PETSC_USE_COMPLEX) 1291 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1292 fft->n = (PetscInt)local_n0 * dim[1]; 1293 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1294 #else 1295 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1296 1297 fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 1298 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 1299 #endif 1300 break; 1301 case 3: 1302 #if defined(PETSC_USE_COMPLEX) 1303 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1304 1305 fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 1306 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1307 #else 1308 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1309 1310 fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 1311 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 1312 #endif 1313 break; 1314 default: 1315 #if defined(PETSC_USE_COMPLEX) 1316 fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 1317 1318 PetscCall(PetscIntCast(local_n0 * partial_dim, &fft->n)); 1319 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1320 #else 1321 temp = pdim[ndim - 1]; 1322 1323 pdim[ndim - 1] = temp / 2 + 1; 1324 1325 fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1326 1327 PetscCall(PetscIntCast(2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp, &fft->n)); 1328 N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 1329 1330 pdim[ndim - 1] = temp; 1331 1332 PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1333 #endif 1334 break; 1335 } 1336 #endif 1337 } 1338 free(pdim); 1339 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 1340 PetscCall(PetscNew(&fftw)); 1341 fft->data = (void *)fftw; 1342 1343 fftw->ndim_fftw = ndim; /* This is dimension of fft */ 1344 fftw->partial_dim = partial_dim; 1345 1346 PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw)); 1347 if (size == 1) { 1348 #if defined(PETSC_USE_64BIT_INDICES) 1349 fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1350 #else 1351 fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1352 #endif 1353 } 1354 1355 for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1356 1357 fftw->p_forward = NULL; 1358 fftw->p_backward = NULL; 1359 fftw->p_flag = FFTW_ESTIMATE; 1360 1361 if (size == 1) { 1362 A->ops->mult = MatMult_SeqFFTW; 1363 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1364 #if !PetscDefined(HAVE_MPIUNI) 1365 } else { 1366 A->ops->mult = MatMult_MPIFFTW; 1367 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1368 #endif 1369 } 1370 fft->matdestroy = MatDestroy_FFTW; 1371 A->assembled = PETSC_TRUE; 1372 A->preallocated = PETSC_TRUE; 1373 1374 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 1375 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 1376 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1377 1378 /* get runtime options */ 1379 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 1380 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 1381 if (flg) fftw->p_flag = iplans[p_flag]; 1382 PetscOptionsEnd(); 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385