1 /* 2 Provides an interface to the FFTW package. 3 Testing examples can be found in ~src/mat/tests 4 */ 5 6 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 7 EXTERN_C_BEGIN 8 #if !PetscDefined(HAVE_MPIUNI) 9 #include <fftw3-mpi.h> 10 #else 11 #include <fftw3.h> 12 #endif 13 EXTERN_C_END 14 15 typedef struct { 16 ptrdiff_t ndim_fftw, *dim_fftw; 17 #if defined(PETSC_USE_64BIT_INDICES) 18 fftw_iodim64 *iodims; 19 #else 20 fftw_iodim *iodims; 21 #endif 22 PetscInt partial_dim; 23 fftw_plan p_forward, p_backward; 24 unsigned p_flag; /* planner flags, FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 25 PetscScalar *finarray, *foutarray, *binarray, *boutarray; /* keep track of arrays because fftw_execute() can only be executed for the arrays with which the plan was created */ 26 } Mat_FFTW; 27 28 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat, Vec *, Vec *, Vec *); 29 30 static PetscErrorCode MatMult_SeqFFTW(Mat A, Vec x, Vec y) 31 { 32 Mat_FFT *fft = (Mat_FFT *)A->data; 33 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 34 const PetscScalar *x_array; 35 PetscScalar *y_array; 36 Vec xx; 37 #if defined(PETSC_USE_COMPLEX) 38 #if defined(PETSC_USE_64BIT_INDICES) 39 fftw_iodim64 *iodims; 40 #else 41 fftw_iodim *iodims; 42 #endif 43 PetscInt i; 44 #endif 45 PetscInt ndim = fft->ndim, *dim = fft->dim; 46 47 PetscFunctionBegin; 48 if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) { 49 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 50 PetscCall(VecDuplicate(x, &xx)); 51 PetscCall(VecGetArrayRead(xx, &x_array)); 52 } else { 53 PetscCall(VecGetArrayRead(x, &x_array)); 54 } 55 PetscCall(VecGetArray(y, &y_array)); 56 if (!fftw->p_forward) { /* create a plan, then execute it */ 57 switch (ndim) { 58 case 1: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 63 #endif 64 break; 65 case 2: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 70 #endif 71 break; 72 case 3: 73 #if defined(PETSC_USE_COMPLEX) 74 fftw->p_forward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 75 #else 76 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 77 #endif 78 break; 79 default: 80 #if defined(PETSC_USE_COMPLEX) 81 iodims = fftw->iodims; 82 #if defined(PETSC_USE_64BIT_INDICES) 83 if (ndim) { 84 iodims[ndim - 1].n = (ptrdiff_t)dim[ndim - 1]; 85 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 86 for (i = ndim - 2; i >= 0; --i) { 87 iodims[i].n = (ptrdiff_t)dim[i]; 88 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 89 } 90 } 91 fftw->p_forward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 92 #else 93 if (ndim) { 94 iodims[ndim - 1].n = (int)dim[ndim - 1]; 95 iodims[ndim - 1].is = iodims[ndim - 1].os = 1; 96 for (i = ndim - 2; i >= 0; --i) { 97 iodims[i].n = (int)dim[i]; 98 iodims[i].is = iodims[i].os = iodims[i + 1].is * iodims[i + 1].n; 99 } 100 } 101 fftw->p_forward = fftw_plan_guru_dft((int)ndim, (fftw_iodim *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_FORWARD, fftw->p_flag); 102 #endif 103 104 #else 105 fftw->p_forward = fftw_plan_dft_r2c(ndim, (int *)dim, (double *)x_array, (fftw_complex *)y_array, fftw->p_flag); 106 #endif 107 break; 108 } 109 if (fftw->p_flag != FFTW_ESTIMATE) { 110 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 111 PetscCall(VecRestoreArrayRead(xx, &x_array)); 112 PetscCall(VecDestroy(&xx)); 113 PetscCall(VecGetArrayRead(x, &x_array)); 114 } else { 115 fftw->finarray = (PetscScalar *)x_array; 116 fftw->foutarray = y_array; 117 } 118 } 119 120 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 121 #if defined(PETSC_USE_COMPLEX) 122 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 123 #else 124 fftw_execute_dft_r2c(fftw->p_forward, (double *)x_array, (fftw_complex *)y_array); 125 #endif 126 } else { 127 fftw_execute(fftw->p_forward); 128 } 129 PetscCall(VecRestoreArray(y, &y_array)); 130 PetscCall(VecRestoreArrayRead(x, &x_array)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 static PetscErrorCode MatMultTranspose_SeqFFTW(Mat A, Vec x, Vec y) 135 { 136 Mat_FFT *fft = (Mat_FFT *)A->data; 137 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 138 const PetscScalar *x_array; 139 PetscScalar *y_array; 140 PetscInt ndim = fft->ndim, *dim = fft->dim; 141 Vec xx; 142 #if defined(PETSC_USE_COMPLEX) 143 #if defined(PETSC_USE_64BIT_INDICES) 144 fftw_iodim64 *iodims = fftw->iodims; 145 #else 146 fftw_iodim *iodims = fftw->iodims; 147 #endif 148 #endif 149 150 PetscFunctionBegin; 151 if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) { 152 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 153 PetscCall(VecDuplicate(x, &xx)); 154 PetscCall(VecGetArrayRead(xx, &x_array)); 155 } else { 156 PetscCall(VecGetArrayRead(x, &x_array)); 157 } 158 PetscCall(VecGetArray(y, &y_array)); 159 if (!fftw->p_backward) { /* create a plan, then execute it */ 160 switch (ndim) { 161 case 1: 162 #if defined(PETSC_USE_COMPLEX) 163 fftw->p_backward = fftw_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 164 #else 165 fftw->p_backward = fftw_plan_dft_c2r_1d(dim[0], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 166 #endif 167 break; 168 case 2: 169 #if defined(PETSC_USE_COMPLEX) 170 fftw->p_backward = fftw_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 171 #else 172 fftw->p_backward = fftw_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 173 #endif 174 break; 175 case 3: 176 #if defined(PETSC_USE_COMPLEX) 177 fftw->p_backward = fftw_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 178 #else 179 fftw->p_backward = fftw_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 180 #endif 181 break; 182 default: 183 #if defined(PETSC_USE_COMPLEX) 184 #if defined(PETSC_USE_64BIT_INDICES) 185 fftw->p_backward = fftw_plan_guru64_dft((int)ndim, (fftw_iodim64 *)iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 186 #else 187 fftw->p_backward = fftw_plan_guru_dft((int)ndim, iodims, 0, NULL, (fftw_complex *)x_array, (fftw_complex *)y_array, FFTW_BACKWARD, fftw->p_flag); 188 #endif 189 #else 190 fftw->p_backward = fftw_plan_dft_c2r((int)ndim, (int *)dim, (fftw_complex *)x_array, (double *)y_array, fftw->p_flag); 191 #endif 192 break; 193 } 194 if (fftw->p_flag != FFTW_ESTIMATE) { 195 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 196 PetscCall(VecRestoreArrayRead(xx, &x_array)); 197 PetscCall(VecDestroy(&xx)); 198 PetscCall(VecGetArrayRead(x, &x_array)); 199 } else { 200 fftw->binarray = (PetscScalar *)x_array; 201 fftw->boutarray = y_array; 202 } 203 } 204 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 205 #if defined(PETSC_USE_COMPLEX) 206 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 207 #else 208 fftw_execute_dft_c2r(fftw->p_backward, (fftw_complex *)x_array, (double *)y_array); 209 #endif 210 } else { 211 fftw_execute(fftw->p_backward); 212 } 213 PetscCall(VecRestoreArray(y, &y_array)); 214 PetscCall(VecRestoreArrayRead(x, &x_array)); 215 PetscFunctionReturn(PETSC_SUCCESS); 216 } 217 218 #if !PetscDefined(HAVE_MPIUNI) 219 static PetscErrorCode MatMult_MPIFFTW(Mat A, Vec x, Vec y) 220 { 221 Mat_FFT *fft = (Mat_FFT *)A->data; 222 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 223 const PetscScalar *x_array; 224 PetscScalar *y_array; 225 PetscInt ndim = fft->ndim, *dim = fft->dim; 226 MPI_Comm comm; 227 Vec xx; 228 229 PetscFunctionBegin; 230 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 231 if (!fftw->p_forward && fftw->p_flag != FFTW_ESTIMATE) { 232 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 233 PetscCall(VecDuplicate(x, &xx)); 234 PetscCall(VecGetArrayRead(xx, &x_array)); 235 } else { 236 PetscCall(VecGetArrayRead(x, &x_array)); 237 } 238 PetscCall(VecGetArray(y, &y_array)); 239 if (!fftw->p_forward) { /* create a plan, then execute it */ 240 switch (ndim) { 241 case 1: 242 #if defined(PETSC_USE_COMPLEX) 243 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 244 #else 245 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 246 #endif 247 break; 248 case 2: 249 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 250 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 251 #else 252 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0], dim[1], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 253 #endif 254 break; 255 case 3: 256 #if defined(PETSC_USE_COMPLEX) 257 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 258 #else 259 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0], dim[1], dim[2], (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 260 #endif 261 break; 262 default: 263 #if defined(PETSC_USE_COMPLEX) 264 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_FORWARD, fftw->p_flag); 265 #else 266 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw, fftw->dim_fftw, (double *)x_array, (fftw_complex *)y_array, comm, FFTW_ESTIMATE); 267 #endif 268 break; 269 } 270 if (fftw->p_flag != FFTW_ESTIMATE) { 271 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 272 PetscCall(VecRestoreArrayRead(xx, &x_array)); 273 PetscCall(VecDestroy(&xx)); 274 PetscCall(VecGetArrayRead(x, &x_array)); 275 } else { 276 fftw->finarray = (PetscScalar *)x_array; 277 fftw->foutarray = y_array; 278 } 279 } 280 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 281 fftw_execute_dft(fftw->p_forward, (fftw_complex *)x_array, (fftw_complex *)y_array); 282 } else { 283 fftw_execute(fftw->p_forward); 284 } 285 PetscCall(VecRestoreArray(y, &y_array)); 286 PetscCall(VecRestoreArrayRead(x, &x_array)); 287 PetscFunctionReturn(PETSC_SUCCESS); 288 } 289 290 static PetscErrorCode MatMultTranspose_MPIFFTW(Mat A, Vec x, Vec y) 291 { 292 Mat_FFT *fft = (Mat_FFT *)A->data; 293 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 294 const PetscScalar *x_array; 295 PetscScalar *y_array; 296 PetscInt ndim = fft->ndim, *dim = fft->dim; 297 MPI_Comm comm; 298 Vec xx; 299 300 PetscFunctionBegin; 301 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 302 if (!fftw->p_backward && fftw->p_flag != FFTW_ESTIMATE) { 303 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 304 PetscCall(VecDuplicate(x, &xx)); 305 PetscCall(VecGetArrayRead(xx, &x_array)); 306 } else { 307 PetscCall(VecGetArrayRead(x, &x_array)); 308 } 309 PetscCall(VecGetArray(y, &y_array)); 310 if (!fftw->p_backward) { /* create a plan, then execute it */ 311 switch (ndim) { 312 case 1: 313 #if defined(PETSC_USE_COMPLEX) 314 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 315 #else 316 SETERRQ(comm, PETSC_ERR_SUP, "not support for real numbers yet"); 317 #endif 318 break; 319 case 2: 320 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 321 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0], dim[1], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 322 #else 323 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0], dim[1], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 324 #endif 325 break; 326 case 3: 327 #if defined(PETSC_USE_COMPLEX) 328 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 329 #else 330 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0], dim[1], dim[2], (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 331 #endif 332 break; 333 default: 334 #if defined(PETSC_USE_COMPLEX) 335 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (fftw_complex *)y_array, comm, FFTW_BACKWARD, fftw->p_flag); 336 #else 337 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw, fftw->dim_fftw, (fftw_complex *)x_array, (double *)y_array, comm, FFTW_ESTIMATE); 338 #endif 339 break; 340 } 341 if (fftw->p_flag != FFTW_ESTIMATE) { 342 /* The data in the in/out arrays is overwritten so need a dummy array for computation, see FFTW manual sec2.1 or sec4 */ 343 PetscCall(VecRestoreArrayRead(xx, &x_array)); 344 PetscCall(VecDestroy(&xx)); 345 PetscCall(VecGetArrayRead(x, &x_array)); 346 } else { 347 fftw->binarray = (PetscScalar *)x_array; 348 fftw->boutarray = y_array; 349 } 350 } 351 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 352 fftw_execute_dft(fftw->p_backward, (fftw_complex *)x_array, (fftw_complex *)y_array); 353 } else { 354 fftw_execute(fftw->p_backward); 355 } 356 PetscCall(VecRestoreArray(y, &y_array)); 357 PetscCall(VecRestoreArrayRead(x, &x_array)); 358 PetscFunctionReturn(PETSC_SUCCESS); 359 } 360 #endif 361 362 static PetscErrorCode MatDestroy_FFTW(Mat A) 363 { 364 Mat_FFT *fft = (Mat_FFT *)A->data; 365 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 366 367 PetscFunctionBegin; 368 fftw_destroy_plan(fftw->p_forward); 369 fftw_destroy_plan(fftw->p_backward); 370 if (fftw->iodims) free(fftw->iodims); 371 PetscCall(PetscFree(fftw->dim_fftw)); 372 PetscCall(PetscFree(fft->data)); 373 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", NULL)); 374 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", NULL)); 375 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", NULL)); 376 #if !PetscDefined(HAVE_MPIUNI) 377 fftw_mpi_cleanup(); 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #if !PetscDefined(HAVE_MPIUNI) 383 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 384 static PetscErrorCode VecDestroy_MPIFFTW(Vec v) 385 { 386 PetscScalar *array; 387 388 PetscFunctionBegin; 389 PetscCall(VecGetArray(v, &array)); 390 fftw_free((fftw_complex *)array); 391 PetscCall(VecRestoreArray(v, &array)); 392 PetscCall(VecDestroy_MPI(v)); 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 #endif 396 397 #if !PetscDefined(HAVE_MPIUNI) 398 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin, Vec *fin_new) 399 { 400 Mat A; 401 402 PetscFunctionBegin; 403 PetscCall(PetscObjectQuery((PetscObject)fin, "FFTmatrix", (PetscObject *)&A)); 404 PetscCall(MatCreateVecsFFTW_FFTW(A, fin_new, NULL, NULL)); 405 PetscFunctionReturn(PETSC_SUCCESS); 406 } 407 408 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout, Vec *fout_new) 409 { 410 Mat A; 411 412 PetscFunctionBegin; 413 PetscCall(PetscObjectQuery((PetscObject)fout, "FFTmatrix", (PetscObject *)&A)); 414 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, fout_new, NULL)); 415 PetscFunctionReturn(PETSC_SUCCESS); 416 } 417 418 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 419 { 420 Mat A; 421 422 PetscFunctionBegin; 423 PetscCall(PetscObjectQuery((PetscObject)bout, "FFTmatrix", (PetscObject *)&A)); 424 PetscCall(MatCreateVecsFFTW_FFTW(A, NULL, NULL, bout_new)); 425 PetscFunctionReturn(PETSC_SUCCESS); 426 } 427 #endif 428 429 /*@ 430 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 431 parallel layout determined by `MATFFTW` 432 433 Collective 434 435 Input Parameter: 436 . A - the matrix 437 438 Output Parameters: 439 + x - (optional) input vector of forward FFTW 440 . y - (optional) output vector of forward FFTW 441 - z - (optional) output vector of backward FFTW 442 443 Options Database Key: 444 . -mat_fftw_plannerflags - set FFTW planner flags 445 446 Level: advanced 447 448 Notes: 449 The parallel layout of output of forward FFTW is always same as the input 450 of the backward FFTW. But parallel layout of the input vector of forward 451 FFTW might not be same as the output of backward FFTW. 452 453 We need to provide enough space while doing parallel real transform. 454 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 455 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 456 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 457 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 458 zeros if it is odd only one zero is needed. 459 460 Lastly one needs some scratch space at the end of data set in each process. alloc_local 461 figures out how much space is needed, i.e. it figures out the data+scratch space for 462 each processor and returns that. 463 464 Developer Notes: 465 How come `MatCreateVecs()` doesn't produce the correctly padded vectors automatically? 466 467 .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `MatCreateFFT()`, `MatCreateVecs()` 468 @*/ 469 PetscErrorCode MatCreateVecsFFTW(Mat A, Vec *x, Vec *y, Vec *z) 470 { 471 PetscFunctionBegin; 472 PetscUseMethod(A, "MatCreateVecsFFTW_C", (Mat, Vec *, Vec *, Vec *), (A, x, y, z)); 473 PetscFunctionReturn(PETSC_SUCCESS); 474 } 475 476 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A, Vec *fin, Vec *fout, Vec *bout) 477 { 478 PetscMPIInt size, rank; 479 MPI_Comm comm; 480 Mat_FFT *fft = (Mat_FFT *)A->data; 481 482 PetscFunctionBegin; 483 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 484 PetscValidType(A, 1); 485 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 486 487 PetscCallMPI(MPI_Comm_size(comm, &size)); 488 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 489 if (size == 1) { /* sequential case */ 490 #if defined(PETSC_USE_COMPLEX) 491 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fin)); 492 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, fout)); 493 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->N, bout)); 494 #else 495 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fin)); 496 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, fout)); 497 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF, fft->n, bout)); 498 #endif 499 #if !PetscDefined(HAVE_MPIUNI) 500 } else { /* parallel cases */ 501 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 502 PetscInt ndim = fft->ndim, *dim = fft->dim; 503 ptrdiff_t alloc_local, local_n0, local_0_start; 504 ptrdiff_t local_n1; 505 fftw_complex *data_fout; 506 ptrdiff_t local_1_start; 507 #if defined(PETSC_USE_COMPLEX) 508 fftw_complex *data_fin, *data_bout; 509 #else 510 double *data_finr, *data_boutr; 511 PetscInt n1, N1; 512 ptrdiff_t temp; 513 #endif 514 515 switch (ndim) { 516 case 1: 517 #if !defined(PETSC_USE_COMPLEX) 518 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not allow parallel real 1D transform"); 519 #else 520 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 521 if (fin) { 522 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 523 PetscCall(VecCreateMPIWithArray(comm, 1, local_n0, fft->N, (const PetscScalar *)data_fin, fin)); 524 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 525 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 526 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 527 } 528 if (fout) { 529 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 530 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_fout, fout)); 531 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 532 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 533 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 alloc_local = fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 536 if (bout) { 537 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 538 PetscCall(VecCreateMPIWithArray(comm, 1, local_n1, fft->N, (const PetscScalar *)data_bout, bout)); 539 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 540 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 541 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 542 } 543 break; 544 #endif 545 case 2: 546 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 547 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 548 N1 = 2 * dim[0] * (dim[1] / 2 + 1); 549 n1 = 2 * local_n0 * (dim[1] / 2 + 1); 550 if (fin) { 551 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 552 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 553 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 554 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 555 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 556 } 557 if (fout) { 558 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 559 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_fout, fout)); 560 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 561 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 562 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 563 } 564 if (bout) { 565 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 566 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 567 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 568 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 569 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 570 } 571 #else 572 /* Get local size */ 573 alloc_local = fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 574 if (fin) { 575 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 576 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 577 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 578 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 579 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 580 } 581 if (fout) { 582 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 583 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 584 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 585 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 586 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 587 } 588 if (bout) { 589 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 590 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 591 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 592 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 593 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 594 } 595 #endif 596 break; 597 case 3: 598 #if !defined(PETSC_USE_COMPLEX) 599 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 600 N1 = 2 * dim[0] * dim[1] * (dim[2] / 2 + 1); 601 n1 = 2 * local_n0 * dim[1] * (dim[2] / 2 + 1); 602 if (fin) { 603 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 604 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_finr, fin)); 605 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 606 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 607 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 608 } 609 if (fout) { 610 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 611 PetscCall(VecCreateMPIWithArray(comm, 1, n1, N1, (PetscScalar *)data_fout, fout)); 612 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 613 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 614 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 615 } 616 if (bout) { 617 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 618 PetscCall(VecCreateMPIWithArray(comm, 1, (PetscInt)n1, N1, (PetscScalar *)data_boutr, bout)); 619 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 620 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 621 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 622 } 623 #else 624 alloc_local = fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 625 if (fin) { 626 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 627 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 628 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 629 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 630 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 631 } 632 if (fout) { 633 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 634 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 635 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 636 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 637 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 638 } 639 if (bout) { 640 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 641 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 642 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 643 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 644 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 645 } 646 #endif 647 break; 648 default: 649 #if !defined(PETSC_USE_COMPLEX) 650 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 651 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 652 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 653 N1 = 2 * fft->N * (PetscInt)(fftw->dim_fftw[fftw->ndim_fftw - 1]) / ((PetscInt)temp); 654 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 655 656 if (fin) { 657 data_finr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 658 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_finr, fin)); 659 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 660 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 661 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 662 } 663 if (fout) { 664 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 665 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_fout, fout)); 666 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 667 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 668 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 669 } 670 if (bout) { 671 data_boutr = (double *)fftw_malloc(sizeof(double) * alloc_local * 2); 672 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, N1, (PetscScalar *)data_boutr, bout)); 673 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 674 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 675 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 676 } 677 #else 678 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 679 if (fin) { 680 data_fin = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 681 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fin, fin)); 682 PetscCall(PetscObjectCompose((PetscObject)*fin, "FFTmatrix", (PetscObject)A)); 683 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 684 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 685 } 686 if (fout) { 687 data_fout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 688 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_fout, fout)); 689 PetscCall(PetscObjectCompose((PetscObject)*fout, "FFTmatrix", (PetscObject)A)); 690 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 691 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 692 } 693 if (bout) { 694 data_bout = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * alloc_local); 695 PetscCall(VecCreateMPIWithArray(comm, 1, fft->n, fft->N, (const PetscScalar *)data_bout, bout)); 696 PetscCall(PetscObjectCompose((PetscObject)*bout, "FFTmatrix", (PetscObject)A)); 697 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 698 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 699 } 700 #endif 701 break; 702 } 703 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 704 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 705 memory leaks. We void these pointers here to avoid accident uses. 706 */ 707 if (fin) (*fin)->ops->replacearray = NULL; 708 if (fout) (*fout)->ops->replacearray = NULL; 709 if (bout) (*bout)->ops->replacearray = NULL; 710 #endif 711 } 712 PetscFunctionReturn(PETSC_SUCCESS); 713 } 714 715 /*@ 716 VecScatterPetscToFFTW - Copies a PETSc vector to the vector that goes into `MATFFTW` calls. 717 718 Collective 719 720 Input Parameters: 721 + A - FFTW matrix 722 - x - the PETSc vector 723 724 Output Parameter: 725 . y - the FFTW vector 726 727 Level: intermediate 728 729 Note: 730 For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 731 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 732 zeros. This routine does that job by scattering operation. 733 734 .seealso: [](ch_matrices), `Mat`, `MATFFTW`, `VecScatterFFTWToPetsc()`, `MatCreateVecsFFTW()` 735 @*/ 736 PetscErrorCode VecScatterPetscToFFTW(Mat A, Vec x, Vec y) 737 { 738 PetscFunctionBegin; 739 PetscUseMethod(A, "VecScatterPetscToFFTW_C", (Mat, Vec, Vec), (A, x, y)); 740 PetscFunctionReturn(PETSC_SUCCESS); 741 } 742 743 static PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A, Vec x, Vec y) 744 { 745 MPI_Comm comm; 746 Mat_FFT *fft = (Mat_FFT *)A->data; 747 PetscInt low; 748 PetscMPIInt rank, size; 749 PetscInt vsize, vsize1; 750 VecScatter vecscat; 751 IS list1; 752 753 PetscFunctionBegin; 754 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 755 PetscCallMPI(MPI_Comm_size(comm, &size)); 756 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 757 PetscCall(VecGetOwnershipRange(y, &low, NULL)); 758 759 if (size == 1) { 760 PetscCall(VecGetSize(x, &vsize)); 761 PetscCall(VecGetSize(y, &vsize1)); 762 PetscCall(ISCreateStride(PETSC_COMM_SELF, fft->N, 0, 1, &list1)); 763 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 764 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 765 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 766 PetscCall(VecScatterDestroy(&vecscat)); 767 PetscCall(ISDestroy(&list1)); 768 #if !PetscDefined(HAVE_MPIUNI) 769 } else { 770 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 771 PetscInt ndim = fft->ndim, *dim = fft->dim; 772 ptrdiff_t local_n0, local_0_start; 773 ptrdiff_t local_n1, local_1_start; 774 IS list2; 775 #if !defined(PETSC_USE_COMPLEX) 776 PetscInt i, j, k, partial_dim; 777 PetscInt *indx1, *indx2, tempindx, tempindx1; 778 PetscInt NM; 779 ptrdiff_t temp; 780 #endif 781 782 switch (ndim) { 783 case 1: 784 #if defined(PETSC_USE_COMPLEX) 785 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 786 787 PetscCall(ISCreateStride(comm, local_n0, local_0_start, 1, &list1)); 788 PetscCall(ISCreateStride(comm, local_n0, low, 1, &list2)); 789 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 790 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 791 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 792 PetscCall(VecScatterDestroy(&vecscat)); 793 PetscCall(ISDestroy(&list1)); 794 PetscCall(ISDestroy(&list2)); 795 #else 796 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 797 #endif 798 break; 799 case 2: 800 #if defined(PETSC_USE_COMPLEX) 801 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 802 803 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 804 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 805 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 806 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 807 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 808 PetscCall(VecScatterDestroy(&vecscat)); 809 PetscCall(ISDestroy(&list1)); 810 PetscCall(ISDestroy(&list2)); 811 #else 812 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 813 814 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1)); 815 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2)); 816 817 if (dim[1] % 2 == 0) { 818 NM = dim[1] + 2; 819 } else { 820 NM = dim[1] + 1; 821 } 822 for (i = 0; i < local_n0; i++) { 823 for (j = 0; j < dim[1]; j++) { 824 tempindx = i * dim[1] + j; 825 tempindx1 = i * NM + j; 826 827 indx1[tempindx] = local_0_start * dim[1] + tempindx; 828 indx2[tempindx] = low + tempindx1; 829 } 830 } 831 832 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 833 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 834 835 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 836 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 837 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 838 PetscCall(VecScatterDestroy(&vecscat)); 839 PetscCall(ISDestroy(&list1)); 840 PetscCall(ISDestroy(&list2)); 841 PetscCall(PetscFree(indx1)); 842 PetscCall(PetscFree(indx2)); 843 #endif 844 break; 845 846 case 3: 847 #if defined(PETSC_USE_COMPLEX) 848 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 849 850 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 851 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 852 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 853 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 854 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 855 PetscCall(VecScatterDestroy(&vecscat)); 856 PetscCall(ISDestroy(&list1)); 857 PetscCall(ISDestroy(&list2)); 858 #else 859 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 860 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 3D real transform"); 861 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 862 863 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1)); 864 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2)); 865 866 if (dim[2] % 2 == 0) NM = dim[2] + 2; 867 else NM = dim[2] + 1; 868 869 for (i = 0; i < local_n0; i++) { 870 for (j = 0; j < dim[1]; j++) { 871 for (k = 0; k < dim[2]; k++) { 872 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 873 tempindx1 = i * dim[1] * NM + j * NM + k; 874 875 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 876 indx2[tempindx] = low + tempindx1; 877 } 878 } 879 } 880 881 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 882 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 883 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 884 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 885 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 886 PetscCall(VecScatterDestroy(&vecscat)); 887 PetscCall(ISDestroy(&list1)); 888 PetscCall(ISDestroy(&list2)); 889 PetscCall(PetscFree(indx1)); 890 PetscCall(PetscFree(indx2)); 891 #endif 892 break; 893 894 default: 895 #if defined(PETSC_USE_COMPLEX) 896 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 897 898 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 899 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 900 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 901 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 902 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 903 PetscCall(VecScatterDestroy(&vecscat)); 904 PetscCall(ISDestroy(&list1)); 905 PetscCall(ISDestroy(&list2)); 906 #else 907 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 908 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel DIM>3 real transform"); 909 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 910 911 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 912 913 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 914 915 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 916 917 partial_dim = fftw->partial_dim; 918 919 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1)); 920 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2)); 921 922 if (dim[ndim - 1] % 2 == 0) NM = 2; 923 else NM = 1; 924 925 j = low; 926 for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) { 927 indx1[i] = local_0_start * partial_dim + i; 928 indx2[i] = j; 929 if (k % dim[ndim - 1] == 0) j += NM; 930 j++; 931 } 932 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 933 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 934 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 935 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 936 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 937 PetscCall(VecScatterDestroy(&vecscat)); 938 PetscCall(ISDestroy(&list1)); 939 PetscCall(ISDestroy(&list2)); 940 PetscCall(PetscFree(indx1)); 941 PetscCall(PetscFree(indx2)); 942 #endif 943 break; 944 } 945 #endif 946 } 947 PetscFunctionReturn(PETSC_SUCCESS); 948 } 949 950 /*@ 951 VecScatterFFTWToPetsc - Converts `MATFFTW` output vector to a PETSc vector. 952 953 Collective 954 955 Input Parameters: 956 + A - `MATFFTW` matrix 957 - x - FFTW vector 958 959 Output Parameter: 960 . y - PETSc vector 961 962 Level: intermediate 963 964 Note: 965 While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 966 `VecScatterFFTWToPetsc()` removes those extra zeros. 967 968 .seealso: [](ch_matrices), `Mat`, `VecScatterPetscToFFTW()`, `MATFFTW`, `MatCreateVecsFFTW()` 969 @*/ 970 PetscErrorCode VecScatterFFTWToPetsc(Mat A, Vec x, Vec y) 971 { 972 PetscFunctionBegin; 973 PetscUseMethod(A, "VecScatterFFTWToPetsc_C", (Mat, Vec, Vec), (A, x, y)); 974 PetscFunctionReturn(PETSC_SUCCESS); 975 } 976 977 static PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A, Vec x, Vec y) 978 { 979 MPI_Comm comm; 980 Mat_FFT *fft = (Mat_FFT *)A->data; 981 PetscInt low; 982 PetscMPIInt rank, size; 983 VecScatter vecscat; 984 IS list1; 985 986 PetscFunctionBegin; 987 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 988 PetscCallMPI(MPI_Comm_size(comm, &size)); 989 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 990 PetscCall(VecGetOwnershipRange(x, &low, NULL)); 991 992 if (size == 1) { 993 PetscCall(ISCreateStride(comm, fft->N, 0, 1, &list1)); 994 PetscCall(VecScatterCreate(x, list1, y, list1, &vecscat)); 995 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 996 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 997 PetscCall(VecScatterDestroy(&vecscat)); 998 PetscCall(ISDestroy(&list1)); 999 1000 #if !PetscDefined(HAVE_MPIUNI) 1001 } else { 1002 Mat_FFTW *fftw = (Mat_FFTW *)fft->data; 1003 PetscInt ndim = fft->ndim, *dim = fft->dim; 1004 ptrdiff_t local_n0, local_0_start; 1005 ptrdiff_t local_n1, local_1_start; 1006 IS list2; 1007 #if !defined(PETSC_USE_COMPLEX) 1008 PetscInt i, j, k, partial_dim; 1009 PetscInt *indx1, *indx2, tempindx, tempindx1; 1010 PetscInt NM; 1011 ptrdiff_t temp; 1012 #endif 1013 switch (ndim) { 1014 case 1: 1015 #if defined(PETSC_USE_COMPLEX) 1016 fftw_mpi_local_size_1d(dim[0], comm, FFTW_BACKWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1017 1018 PetscCall(ISCreateStride(comm, local_n1, local_1_start, 1, &list1)); 1019 PetscCall(ISCreateStride(comm, local_n1, low, 1, &list2)); 1020 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1021 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1022 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1023 PetscCall(VecScatterDestroy(&vecscat)); 1024 PetscCall(ISDestroy(&list1)); 1025 PetscCall(ISDestroy(&list2)); 1026 #else 1027 SETERRQ(comm, PETSC_ERR_SUP, "No support for real parallel 1D FFT"); 1028 #endif 1029 break; 1030 case 2: 1031 #if defined(PETSC_USE_COMPLEX) 1032 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1033 1034 PetscCall(ISCreateStride(comm, local_n0 * dim[1], local_0_start * dim[1], 1, &list1)); 1035 PetscCall(ISCreateStride(comm, local_n0 * dim[1], low, 1, &list2)); 1036 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1037 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1038 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1039 PetscCall(VecScatterDestroy(&vecscat)); 1040 PetscCall(ISDestroy(&list1)); 1041 PetscCall(ISDestroy(&list2)); 1042 #else 1043 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1044 1045 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx1)); 1046 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1], &indx2)); 1047 1048 if (dim[1] % 2 == 0) NM = dim[1] + 2; 1049 else NM = dim[1] + 1; 1050 1051 for (i = 0; i < local_n0; i++) { 1052 for (j = 0; j < dim[1]; j++) { 1053 tempindx = i * dim[1] + j; 1054 tempindx1 = i * NM + j; 1055 1056 indx1[tempindx] = local_0_start * dim[1] + tempindx; 1057 indx2[tempindx] = low + tempindx1; 1058 } 1059 } 1060 1061 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx1, PETSC_COPY_VALUES, &list1)); 1062 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1], indx2, PETSC_COPY_VALUES, &list2)); 1063 1064 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1065 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1067 PetscCall(VecScatterDestroy(&vecscat)); 1068 PetscCall(ISDestroy(&list1)); 1069 PetscCall(ISDestroy(&list2)); 1070 PetscCall(PetscFree(indx1)); 1071 PetscCall(PetscFree(indx2)); 1072 #endif 1073 break; 1074 case 3: 1075 #if defined(PETSC_USE_COMPLEX) 1076 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1077 1078 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], local_0_start * dim[1] * dim[2], 1, &list1)); 1079 PetscCall(ISCreateStride(comm, local_n0 * dim[1] * dim[2], low, 1, &list2)); 1080 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1081 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1082 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1083 PetscCall(VecScatterDestroy(&vecscat)); 1084 PetscCall(ISDestroy(&list1)); 1085 PetscCall(ISDestroy(&list2)); 1086 #else 1087 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1088 1089 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx1)); 1090 PetscCall(PetscMalloc1((PetscInt)local_n0 * dim[1] * dim[2], &indx2)); 1091 1092 if (dim[2] % 2 == 0) NM = dim[2] + 2; 1093 else NM = dim[2] + 1; 1094 1095 for (i = 0; i < local_n0; i++) { 1096 for (j = 0; j < dim[1]; j++) { 1097 for (k = 0; k < dim[2]; k++) { 1098 tempindx = i * dim[1] * dim[2] + j * dim[2] + k; 1099 tempindx1 = i * dim[1] * NM + j * NM + k; 1100 1101 indx1[tempindx] = local_0_start * dim[1] * dim[2] + tempindx; 1102 indx2[tempindx] = low + tempindx1; 1103 } 1104 } 1105 } 1106 1107 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx1, PETSC_COPY_VALUES, &list1)); 1108 PetscCall(ISCreateGeneral(comm, local_n0 * dim[1] * dim[2], indx2, PETSC_COPY_VALUES, &list2)); 1109 1110 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1111 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1112 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1113 PetscCall(VecScatterDestroy(&vecscat)); 1114 PetscCall(ISDestroy(&list1)); 1115 PetscCall(ISDestroy(&list2)); 1116 PetscCall(PetscFree(indx1)); 1117 PetscCall(PetscFree(indx2)); 1118 #endif 1119 break; 1120 default: 1121 #if defined(PETSC_USE_COMPLEX) 1122 fftw_mpi_local_size(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start); 1123 1124 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), local_0_start * (fftw->partial_dim), 1, &list1)); 1125 PetscCall(ISCreateStride(comm, local_n0 * (fftw->partial_dim), low, 1, &list2)); 1126 PetscCall(VecScatterCreate(x, list1, y, list2, &vecscat)); 1127 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1128 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1129 PetscCall(VecScatterDestroy(&vecscat)); 1130 PetscCall(ISDestroy(&list1)); 1131 PetscCall(ISDestroy(&list2)); 1132 #else 1133 temp = (fftw->dim_fftw)[fftw->ndim_fftw - 1]; 1134 1135 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp / 2 + 1; 1136 1137 fftw_mpi_local_size_transposed(fftw->ndim_fftw, fftw->dim_fftw, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1138 1139 (fftw->dim_fftw)[fftw->ndim_fftw - 1] = temp; 1140 1141 partial_dim = fftw->partial_dim; 1142 1143 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx1)); 1144 PetscCall(PetscMalloc1((PetscInt)local_n0 * partial_dim, &indx2)); 1145 1146 if (dim[ndim - 1] % 2 == 0) NM = 2; 1147 else NM = 1; 1148 1149 j = low; 1150 for (i = 0, k = 1; i < (PetscInt)local_n0 * partial_dim; i++, k++) { 1151 indx1[i] = local_0_start * partial_dim + i; 1152 indx2[i] = j; 1153 if (k % dim[ndim - 1] == 0) j += NM; 1154 j++; 1155 } 1156 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx1, PETSC_COPY_VALUES, &list1)); 1157 PetscCall(ISCreateGeneral(comm, local_n0 * partial_dim, indx2, PETSC_COPY_VALUES, &list2)); 1158 1159 PetscCall(VecScatterCreate(x, list2, y, list1, &vecscat)); 1160 PetscCall(VecScatterBegin(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1161 PetscCall(VecScatterEnd(vecscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 1162 PetscCall(VecScatterDestroy(&vecscat)); 1163 PetscCall(ISDestroy(&list1)); 1164 PetscCall(ISDestroy(&list2)); 1165 PetscCall(PetscFree(indx1)); 1166 PetscCall(PetscFree(indx2)); 1167 #endif 1168 break; 1169 } 1170 #endif 1171 } 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 /*MC 1176 MATFFTW - "fftw" - Matrix type that provides FFT via the FFTW external package. 1177 1178 Level: intermediate 1179 1180 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatType`, `MatCreateFFT()` 1181 M*/ 1182 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1183 { 1184 MPI_Comm comm; 1185 Mat_FFT *fft = (Mat_FFT *)A->data; 1186 Mat_FFTW *fftw; 1187 PetscInt ndim = fft->ndim, *dim = fft->dim; 1188 const char *plans[] = {"FFTW_ESTIMATE", "FFTW_MEASURE", "FFTW_PATIENT", "FFTW_EXHAUSTIVE"}; 1189 unsigned iplans[] = {FFTW_ESTIMATE, FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE}; 1190 PetscBool flg; 1191 PetscInt p_flag, partial_dim = 1, ctr; 1192 PetscMPIInt size, rank; 1193 ptrdiff_t *pdim; 1194 #if !defined(PETSC_USE_COMPLEX) 1195 PetscInt tot_dim; 1196 #endif 1197 1198 PetscFunctionBegin; 1199 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 1200 PetscCallMPI(MPI_Comm_size(comm, &size)); 1201 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1202 1203 #if !PetscDefined(HAVE_MPIUNI) 1204 fftw_mpi_init(); 1205 #endif 1206 pdim = (ptrdiff_t *)calloc(ndim, sizeof(ptrdiff_t)); 1207 pdim[0] = dim[0]; 1208 #if !defined(PETSC_USE_COMPLEX) 1209 tot_dim = 2 * dim[0]; 1210 #endif 1211 for (ctr = 1; ctr < ndim; ctr++) { 1212 partial_dim *= dim[ctr]; 1213 pdim[ctr] = dim[ctr]; 1214 #if !defined(PETSC_USE_COMPLEX) 1215 if (ctr == ndim - 1) tot_dim *= (dim[ctr] / 2 + 1); 1216 else tot_dim *= dim[ctr]; 1217 #endif 1218 } 1219 1220 if (size == 1) { 1221 #if defined(PETSC_USE_COMPLEX) 1222 PetscCall(MatSetSizes(A, fft->N, fft->N, fft->N, fft->N)); 1223 fft->n = fft->N; 1224 #else 1225 PetscCall(MatSetSizes(A, tot_dim, tot_dim, tot_dim, tot_dim)); 1226 fft->n = tot_dim; 1227 #endif 1228 #if !PetscDefined(HAVE_MPIUNI) 1229 } else { 1230 ptrdiff_t local_n0, local_0_start, local_n1, local_1_start; 1231 #if !defined(PETSC_USE_COMPLEX) 1232 ptrdiff_t temp; 1233 PetscInt N1; 1234 #endif 1235 1236 switch (ndim) { 1237 case 1: 1238 #if !defined(PETSC_USE_COMPLEX) 1239 SETERRQ(comm, PETSC_ERR_SUP, "FFTW does not support parallel 1D real transform"); 1240 #else 1241 fftw_mpi_local_size_1d(dim[0], comm, FFTW_FORWARD, FFTW_ESTIMATE, &local_n0, &local_0_start, &local_n1, &local_1_start); 1242 fft->n = (PetscInt)local_n0; 1243 PetscCall(MatSetSizes(A, local_n1, fft->n, fft->N, fft->N)); 1244 #endif 1245 break; 1246 case 2: 1247 #if defined(PETSC_USE_COMPLEX) 1248 fftw_mpi_local_size_2d(dim[0], dim[1], comm, &local_n0, &local_0_start); 1249 fft->n = (PetscInt)local_n0 * dim[1]; 1250 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1251 #else 1252 fftw_mpi_local_size_2d_transposed(dim[0], dim[1] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1253 1254 fft->n = 2 * (PetscInt)local_n0 * (dim[1] / 2 + 1); 1255 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * (dim[1] / 2 + 1), 2 * dim[0] * (dim[1] / 2 + 1))); 1256 #endif 1257 break; 1258 case 3: 1259 #if defined(PETSC_USE_COMPLEX) 1260 fftw_mpi_local_size_3d(dim[0], dim[1], dim[2], comm, &local_n0, &local_0_start); 1261 1262 fft->n = (PetscInt)local_n0 * dim[1] * dim[2]; 1263 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1264 #else 1265 fftw_mpi_local_size_3d_transposed(dim[0], dim[1], dim[2] / 2 + 1, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1266 1267 fft->n = 2 * (PetscInt)local_n0 * dim[1] * (dim[2] / 2 + 1); 1268 PetscCall(MatSetSizes(A, fft->n, fft->n, 2 * dim[0] * dim[1] * (dim[2] / 2 + 1), 2 * dim[0] * dim[1] * (dim[2] / 2 + 1))); 1269 #endif 1270 break; 1271 default: 1272 #if defined(PETSC_USE_COMPLEX) 1273 fftw_mpi_local_size(ndim, pdim, comm, &local_n0, &local_0_start); 1274 1275 fft->n = (PetscInt)local_n0 * partial_dim; 1276 PetscCall(MatSetSizes(A, fft->n, fft->n, fft->N, fft->N)); 1277 #else 1278 temp = pdim[ndim - 1]; 1279 1280 pdim[ndim - 1] = temp / 2 + 1; 1281 1282 fftw_mpi_local_size_transposed(ndim, pdim, comm, &local_n0, &local_0_start, &local_n1, &local_1_start); 1283 1284 fft->n = 2 * (PetscInt)local_n0 * partial_dim * pdim[ndim - 1] / temp; 1285 N1 = 2 * fft->N * (PetscInt)pdim[ndim - 1] / ((PetscInt)temp); 1286 1287 pdim[ndim - 1] = temp; 1288 1289 PetscCall(MatSetSizes(A, fft->n, fft->n, N1, N1)); 1290 #endif 1291 break; 1292 } 1293 #endif 1294 } 1295 free(pdim); 1296 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATFFTW)); 1297 PetscCall(PetscNew(&fftw)); 1298 fft->data = (void *)fftw; 1299 1300 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1301 fftw->partial_dim = partial_dim; 1302 1303 PetscCall(PetscMalloc1(ndim, &fftw->dim_fftw)); 1304 if (size == 1) { 1305 #if defined(PETSC_USE_64BIT_INDICES) 1306 fftw->iodims = (fftw_iodim64 *)malloc(sizeof(fftw_iodim64) * ndim); 1307 #else 1308 fftw->iodims = (fftw_iodim *)malloc(sizeof(fftw_iodim) * ndim); 1309 #endif 1310 } 1311 1312 for (ctr = 0; ctr < ndim; ctr++) (fftw->dim_fftw)[ctr] = dim[ctr]; 1313 1314 fftw->p_forward = NULL; 1315 fftw->p_backward = NULL; 1316 fftw->p_flag = FFTW_ESTIMATE; 1317 1318 if (size == 1) { 1319 A->ops->mult = MatMult_SeqFFTW; 1320 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1321 #if !PetscDefined(HAVE_MPIUNI) 1322 } else { 1323 A->ops->mult = MatMult_MPIFFTW; 1324 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1325 #endif 1326 } 1327 fft->matdestroy = MatDestroy_FFTW; 1328 A->assembled = PETSC_TRUE; 1329 A->preallocated = PETSC_TRUE; 1330 1331 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatCreateVecsFFTW_C", MatCreateVecsFFTW_FFTW)); 1332 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterPetscToFFTW_C", VecScatterPetscToFFTW_FFTW)); 1333 PetscCall(PetscObjectComposeFunction((PetscObject)A, "VecScatterFFTWToPetsc_C", VecScatterFFTWToPetsc_FFTW)); 1334 1335 /* get runtime options */ 1336 PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "FFTW Options", "Mat"); 1337 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags", "Planner Flags", "None", plans, 4, plans[0], &p_flag, &flg)); 1338 if (flg) fftw->p_flag = iplans[p_flag]; 1339 PetscOptionsEnd(); 1340 PetscFunctionReturn(PETSC_SUCCESS); 1341 } 1342