1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #if !PetscDefined(HAVE_MPIUNI) 10 #include <fftw3-mpi.h> 11 #else 12 #include <fftw3.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 ptrdiff_t ndim_fftw,*dim_fftw; 18 #if defined(PETSC_USE_64BIT_INDICES) 19 fftw_iodim64 *iodims; 20 #else 21 fftw_iodim *iodims; 22 #endif 23 PetscInt partial_dim; 24 fftw_plan p_forward,p_backward; 25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 27 executed for the arrays with which the plan was created */ 28 } Mat_FFTW; 29 30 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 32 extern PetscErrorCode MatDestroy_FFTW(Mat); 33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*); 34 #if !PetscDefined(HAVE_MPIUNI) 35 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 38 #endif 39 40 /* 41 MatMult_SeqFFTW performs forward DFT 42 Input parameter: 43 A - the matrix 44 x - the vector on which FDFT will be performed 45 46 Output parameter: 47 y - vector that stores result of FDFT 48 */ 49 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 50 { 51 Mat_FFT *fft = (Mat_FFT*)A->data; 52 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 53 const PetscScalar *x_array; 54 PetscScalar *y_array; 55 #if defined(PETSC_USE_COMPLEX) 56 #if defined(PETSC_USE_64BIT_INDICES) 57 fftw_iodim64 *iodims; 58 #else 59 fftw_iodim *iodims; 60 #endif 61 PetscInt i; 62 #endif 63 PetscInt ndim = fft->ndim,*dim = fft->dim; 64 65 PetscFunctionBegin; 66 PetscCall(VecGetArrayRead(x,&x_array)); 67 PetscCall(VecGetArray(y,&y_array)); 68 if (!fftw->p_forward) { /* create a plan, then execute it */ 69 switch (ndim) { 70 case 1: 71 #if defined(PETSC_USE_COMPLEX) 72 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 73 #else 74 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 75 #endif 76 break; 77 case 2: 78 #if defined(PETSC_USE_COMPLEX) 79 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 80 #else 81 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 82 #endif 83 break; 84 case 3: 85 #if defined(PETSC_USE_COMPLEX) 86 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 87 #else 88 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 89 #endif 90 break; 91 default: 92 #if defined(PETSC_USE_COMPLEX) 93 iodims = fftw->iodims; 94 #if defined(PETSC_USE_64BIT_INDICES) 95 if (ndim) { 96 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 97 iodims[ndim-1].is = iodims[ndim-1].os = 1; 98 for (i=ndim-2; i>=0; --i) { 99 iodims[i].n = (ptrdiff_t)dim[i]; 100 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 101 } 102 } 103 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 104 #else 105 if (ndim) { 106 iodims[ndim-1].n = (int)dim[ndim-1]; 107 iodims[ndim-1].is = iodims[ndim-1].os = 1; 108 for (i=ndim-2; i>=0; --i) { 109 iodims[i].n = (int)dim[i]; 110 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 111 } 112 } 113 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 114 #endif 115 116 #else 117 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 118 #endif 119 break; 120 } 121 fftw->finarray = (PetscScalar *) x_array; 122 fftw->foutarray = y_array; 123 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 124 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 125 fftw_execute(fftw->p_forward); 126 } else { /* use existing plan */ 127 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 128 #if defined(PETSC_USE_COMPLEX) 129 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 130 #else 131 fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 132 #endif 133 } else { 134 fftw_execute(fftw->p_forward); 135 } 136 } 137 PetscCall(VecRestoreArray(y,&y_array)); 138 PetscCall(VecRestoreArrayRead(x,&x_array)); 139 PetscFunctionReturn(0); 140 } 141 142 /* MatMultTranspose_SeqFFTW performs serial backward DFT 143 Input parameter: 144 A - the matrix 145 x - the vector on which BDFT will be performed 146 147 Output parameter: 148 y - vector that stores result of BDFT 149 */ 150 151 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 152 { 153 Mat_FFT *fft = (Mat_FFT*)A->data; 154 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 155 const PetscScalar *x_array; 156 PetscScalar *y_array; 157 PetscInt ndim = fft->ndim,*dim = fft->dim; 158 #if defined(PETSC_USE_COMPLEX) 159 #if defined(PETSC_USE_64BIT_INDICES) 160 fftw_iodim64 *iodims = fftw->iodims; 161 #else 162 fftw_iodim *iodims = fftw->iodims; 163 #endif 164 #endif 165 166 PetscFunctionBegin; 167 PetscCall(VecGetArrayRead(x,&x_array)); 168 PetscCall(VecGetArray(y,&y_array)); 169 if (!fftw->p_backward) { /* create a plan, then execute it */ 170 switch (ndim) { 171 case 1: 172 #if defined(PETSC_USE_COMPLEX) 173 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 174 #else 175 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 176 #endif 177 break; 178 case 2: 179 #if defined(PETSC_USE_COMPLEX) 180 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 181 #else 182 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 183 #endif 184 break; 185 case 3: 186 #if defined(PETSC_USE_COMPLEX) 187 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 188 #else 189 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 190 #endif 191 break; 192 default: 193 #if defined(PETSC_USE_COMPLEX) 194 #if defined(PETSC_USE_64BIT_INDICES) 195 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 196 #else 197 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 198 #endif 199 #else 200 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 201 #endif 202 break; 203 } 204 fftw->binarray = (PetscScalar *) x_array; 205 fftw->boutarray = y_array; 206 fftw_execute(fftw->p_backward); 207 } else { /* use existing plan */ 208 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 209 #if defined(PETSC_USE_COMPLEX) 210 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 211 #else 212 fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 213 #endif 214 } else { 215 fftw_execute(fftw->p_backward); 216 } 217 } 218 PetscCall(VecRestoreArray(y,&y_array)); 219 PetscCall(VecRestoreArrayRead(x,&x_array)); 220 PetscFunctionReturn(0); 221 } 222 223 #if !PetscDefined(HAVE_MPIUNI) 224 /* MatMult_MPIFFTW performs forward DFT in parallel 225 Input parameter: 226 A - the matrix 227 x - the vector on which FDFT will be performed 228 229 Output parameter: 230 y - vector that stores result of FDFT 231 */ 232 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 233 { 234 Mat_FFT *fft = (Mat_FFT*)A->data; 235 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 236 const PetscScalar *x_array; 237 PetscScalar *y_array; 238 PetscInt ndim = fft->ndim,*dim = fft->dim; 239 MPI_Comm comm; 240 241 PetscFunctionBegin; 242 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 243 PetscCall(VecGetArrayRead(x,&x_array)); 244 PetscCall(VecGetArray(y,&y_array)); 245 if (!fftw->p_forward) { /* create a plan, then execute it */ 246 switch (ndim) { 247 case 1: 248 #if defined(PETSC_USE_COMPLEX) 249 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 250 #else 251 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 252 #endif 253 break; 254 case 2: 255 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 256 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 257 #else 258 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 259 #endif 260 break; 261 case 3: 262 #if defined(PETSC_USE_COMPLEX) 263 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 264 #else 265 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 266 #endif 267 break; 268 default: 269 #if defined(PETSC_USE_COMPLEX) 270 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 271 #else 272 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 273 #endif 274 break; 275 } 276 fftw->finarray = (PetscScalar *) x_array; 277 fftw->foutarray = y_array; 278 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 279 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 280 fftw_execute(fftw->p_forward); 281 } else { /* use existing plan */ 282 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 283 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 284 } else { 285 fftw_execute(fftw->p_forward); 286 } 287 } 288 PetscCall(VecRestoreArray(y,&y_array)); 289 PetscCall(VecRestoreArrayRead(x,&x_array)); 290 PetscFunctionReturn(0); 291 } 292 293 /* 294 MatMultTranspose_MPIFFTW performs parallel backward DFT 295 Input parameter: 296 A - the matrix 297 x - the vector on which BDFT will be performed 298 299 Output parameter: 300 y - vector that stores result of BDFT 301 */ 302 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 303 { 304 Mat_FFT *fft = (Mat_FFT*)A->data; 305 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 306 const PetscScalar *x_array; 307 PetscScalar *y_array; 308 PetscInt ndim = fft->ndim,*dim = fft->dim; 309 MPI_Comm comm; 310 311 PetscFunctionBegin; 312 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 313 PetscCall(VecGetArrayRead(x,&x_array)); 314 PetscCall(VecGetArray(y,&y_array)); 315 if (!fftw->p_backward) { /* create a plan, then execute it */ 316 switch (ndim) { 317 case 1: 318 #if defined(PETSC_USE_COMPLEX) 319 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 320 #else 321 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 322 #endif 323 break; 324 case 2: 325 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 326 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 327 #else 328 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 329 #endif 330 break; 331 case 3: 332 #if defined(PETSC_USE_COMPLEX) 333 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 334 #else 335 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 336 #endif 337 break; 338 default: 339 #if defined(PETSC_USE_COMPLEX) 340 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 341 #else 342 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 343 #endif 344 break; 345 } 346 fftw->binarray = (PetscScalar *) x_array; 347 fftw->boutarray = y_array; 348 fftw_execute(fftw->p_backward); 349 } else { /* use existing plan */ 350 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 351 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 352 } else { 353 fftw_execute(fftw->p_backward); 354 } 355 } 356 PetscCall(VecRestoreArray(y,&y_array)); 357 PetscCall(VecRestoreArrayRead(x,&x_array)); 358 PetscFunctionReturn(0); 359 } 360 #endif 361 362 PetscErrorCode MatDestroy_FFTW(Mat A) 363 { 364 Mat_FFT *fft = (Mat_FFT*)A->data; 365 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 366 367 PetscFunctionBegin; 368 fftw_destroy_plan(fftw->p_forward); 369 fftw_destroy_plan(fftw->p_backward); 370 if (fftw->iodims) { 371 free(fftw->iodims); 372 } 373 PetscCall(PetscFree(fftw->dim_fftw)); 374 PetscCall(PetscFree(fft->data)); 375 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",NULL)); 376 PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",NULL)); 377 PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",NULL)); 378 #if !PetscDefined(HAVE_MPIUNI) 379 fftw_mpi_cleanup(); 380 #endif 381 PetscFunctionReturn(0); 382 } 383 384 #if !PetscDefined(HAVE_MPIUNI) 385 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 386 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 387 { 388 PetscScalar *array; 389 390 PetscFunctionBegin; 391 PetscCall(VecGetArray(v,&array)); 392 fftw_free((fftw_complex*)array); 393 PetscCall(VecRestoreArray(v,&array)); 394 PetscCall(VecDestroy_MPI(v)); 395 PetscFunctionReturn(0); 396 } 397 #endif 398 399 #if !PetscDefined(HAVE_MPIUNI) 400 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new) 401 { 402 Mat A; 403 404 PetscFunctionBegin; 405 PetscCall(PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A)); 406 PetscCall(MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL)); 407 PetscFunctionReturn(0); 408 } 409 410 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new) 411 { 412 Mat A; 413 414 PetscFunctionBegin; 415 PetscCall(PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A)); 416 PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL)); 417 PetscFunctionReturn(0); 418 } 419 420 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 421 { 422 Mat A; 423 424 PetscFunctionBegin; 425 PetscCall(PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A)); 426 PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new)); 427 PetscFunctionReturn(0); 428 } 429 #endif 430 431 /*@ 432 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 433 parallel layout determined by FFTW 434 435 Collective on Mat 436 437 Input Parameter: 438 . A - the matrix 439 440 Output Parameters: 441 + x - (optional) input vector of forward FFTW 442 . y - (optional) output vector of forward FFTW 443 - z - (optional) output vector of backward FFTW 444 445 Level: advanced 446 447 Note: The parallel layout of output of forward FFTW is always same as the input 448 of the backward FFTW. But parallel layout of the input vector of forward 449 FFTW might not be same as the output of backward FFTW. 450 Also note that we need to provide enough space while doing parallel real transform. 451 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 452 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 453 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 454 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 455 zeros if it is odd only one zero is needed. 456 Lastly one needs some scratch space at the end of data set in each process. alloc_local 457 figures out how much space is needed, i.e. it figures out the data+scratch space for 458 each processor and returns that. 459 460 .seealso: `MatCreateFFT()` 461 @*/ 462 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 463 { 464 PetscFunctionBegin; 465 PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z)); 466 PetscFunctionReturn(0); 467 } 468 469 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 470 { 471 PetscMPIInt size,rank; 472 MPI_Comm comm; 473 Mat_FFT *fft = (Mat_FFT*)A->data; 474 475 PetscFunctionBegin; 476 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 477 PetscValidType(A,1); 478 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 479 480 PetscCallMPI(MPI_Comm_size(comm, &size)); 481 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 482 if (size == 1) { /* sequential case */ 483 #if defined(PETSC_USE_COMPLEX) 484 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fin)); 485 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fout)); 486 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,bout)); 487 #else 488 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fin)); 489 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fout)); 490 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,bout)); 491 #endif 492 #if !PetscDefined(HAVE_MPIUNI) 493 } else { /* parallel cases */ 494 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 495 PetscInt ndim = fft->ndim,*dim = fft->dim; 496 ptrdiff_t alloc_local,local_n0,local_0_start; 497 ptrdiff_t local_n1; 498 fftw_complex *data_fout; 499 ptrdiff_t local_1_start; 500 #if defined(PETSC_USE_COMPLEX) 501 fftw_complex *data_fin,*data_bout; 502 #else 503 double *data_finr,*data_boutr; 504 PetscInt n1,N1; 505 ptrdiff_t temp; 506 #endif 507 508 switch (ndim) { 509 case 1: 510 #if !defined(PETSC_USE_COMPLEX) 511 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 512 #else 513 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 514 if (fin) { 515 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 516 PetscCall(VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin)); 517 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 518 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 519 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 520 } 521 if (fout) { 522 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 523 PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout)); 524 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 525 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 526 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 527 } 528 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 529 if (bout) { 530 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 531 PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout)); 532 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 533 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 534 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 535 } 536 break; 537 #endif 538 case 2: 539 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 540 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 541 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 542 if (fin) { 543 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 544 PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin)); 545 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 546 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 547 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 548 } 549 if (fout) { 550 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 551 PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout)); 552 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 553 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 554 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 555 } 556 if (bout) { 557 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 558 PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout)); 559 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 560 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 561 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 562 } 563 #else 564 /* Get local size */ 565 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 566 if (fin) { 567 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 568 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 569 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 570 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 571 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 572 } 573 if (fout) { 574 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 575 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 576 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 577 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 578 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 579 } 580 if (bout) { 581 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 582 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 583 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 584 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 585 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 586 } 587 #endif 588 break; 589 case 3: 590 #if !defined(PETSC_USE_COMPLEX) 591 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 592 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 593 if (fin) { 594 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 595 PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin)); 596 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 597 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 598 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 599 } 600 if (fout) { 601 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 602 PetscCall(VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout)); 603 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 604 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 605 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 606 } 607 if (bout) { 608 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 609 PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout)); 610 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 611 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 612 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 613 } 614 #else 615 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 616 if (fin) { 617 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 618 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 619 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 620 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 621 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 622 } 623 if (fout) { 624 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 625 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 626 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 627 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 628 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 629 } 630 if (bout) { 631 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 632 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 633 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 634 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 635 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 636 } 637 #endif 638 break; 639 default: 640 #if !defined(PETSC_USE_COMPLEX) 641 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 642 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 643 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 644 N1 = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 645 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 646 647 if (fin) { 648 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 649 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin)); 650 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 651 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 652 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 653 } 654 if (fout) { 655 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 656 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout)); 657 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 658 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 659 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 660 } 661 if (bout) { 662 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 663 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout)); 664 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 665 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 666 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 667 } 668 #else 669 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 670 if (fin) { 671 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 672 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 673 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 674 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 675 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 676 } 677 if (fout) { 678 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 679 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 680 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 681 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 682 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 683 } 684 if (bout) { 685 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 686 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 687 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 688 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 689 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 690 } 691 #endif 692 break; 693 } 694 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 695 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 696 memory leaks. We void these pointers here to avoid acident uses. 697 */ 698 if (fin) (*fin)->ops->replacearray = NULL; 699 if (fout) (*fout)->ops->replacearray = NULL; 700 if (bout) (*bout)->ops->replacearray = NULL; 701 #endif 702 } 703 PetscFunctionReturn(0); 704 } 705 706 /*@ 707 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 708 709 Collective on Mat 710 711 Input Parameters: 712 + A - FFTW matrix 713 - x - the PETSc vector 714 715 Output Parameters: 716 . y - the FFTW vector 717 718 Options Database Keys: 719 . -mat_fftw_plannerflags - set FFTW planner flags 720 721 Level: intermediate 722 723 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 724 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 725 zeros. This routine does that job by scattering operation. 726 727 .seealso: `VecScatterFFTWToPetsc()` 728 @*/ 729 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 730 { 731 PetscFunctionBegin; 732 PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y)); 733 PetscFunctionReturn(0); 734 } 735 736 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 737 { 738 MPI_Comm comm; 739 Mat_FFT *fft = (Mat_FFT*)A->data; 740 PetscInt low; 741 PetscMPIInt rank,size; 742 PetscInt vsize,vsize1; 743 VecScatter vecscat; 744 IS list1; 745 746 PetscFunctionBegin; 747 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 748 PetscCallMPI(MPI_Comm_size(comm, &size)); 749 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 750 PetscCall(VecGetOwnershipRange(y,&low,NULL)); 751 752 if (size==1) { 753 PetscCall(VecGetSize(x,&vsize)); 754 PetscCall(VecGetSize(y,&vsize1)); 755 PetscCall(ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1)); 756 PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat)); 757 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 758 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 759 PetscCall(VecScatterDestroy(&vecscat)); 760 PetscCall(ISDestroy(&list1)); 761 #if !PetscDefined(HAVE_MPIUNI) 762 } else { 763 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 764 PetscInt ndim = fft->ndim,*dim = fft->dim; 765 ptrdiff_t local_n0,local_0_start; 766 ptrdiff_t local_n1,local_1_start; 767 IS list2; 768 #if !defined(PETSC_USE_COMPLEX) 769 PetscInt i,j,k,partial_dim; 770 PetscInt *indx1, *indx2, tempindx, tempindx1; 771 PetscInt NM; 772 ptrdiff_t temp; 773 #endif 774 775 switch (ndim) { 776 case 1: 777 #if defined(PETSC_USE_COMPLEX) 778 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 779 780 PetscCall(ISCreateStride(comm,local_n0,local_0_start,1,&list1)); 781 PetscCall(ISCreateStride(comm,local_n0,low,1,&list2)); 782 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 783 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 784 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 785 PetscCall(VecScatterDestroy(&vecscat)); 786 PetscCall(ISDestroy(&list1)); 787 PetscCall(ISDestroy(&list2)); 788 #else 789 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 790 #endif 791 break; 792 case 2: 793 #if defined(PETSC_USE_COMPLEX) 794 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 795 796 PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1)); 797 PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2)); 798 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 799 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 800 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 801 PetscCall(VecScatterDestroy(&vecscat)); 802 PetscCall(ISDestroy(&list1)); 803 PetscCall(ISDestroy(&list2)); 804 #else 805 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 806 807 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1)); 808 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2)); 809 810 if (dim[1]%2==0) { 811 NM = dim[1]+2; 812 } else { 813 NM = dim[1]+1; 814 } 815 for (i=0; i<local_n0; i++) { 816 for (j=0; j<dim[1]; j++) { 817 tempindx = i*dim[1] + j; 818 tempindx1 = i*NM + j; 819 820 indx1[tempindx]=local_0_start*dim[1]+tempindx; 821 indx2[tempindx]=low+tempindx1; 822 } 823 } 824 825 PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1)); 826 PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2)); 827 828 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 829 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 830 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 831 PetscCall(VecScatterDestroy(&vecscat)); 832 PetscCall(ISDestroy(&list1)); 833 PetscCall(ISDestroy(&list2)); 834 PetscCall(PetscFree(indx1)); 835 PetscCall(PetscFree(indx2)); 836 #endif 837 break; 838 839 case 3: 840 #if defined(PETSC_USE_COMPLEX) 841 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 842 843 PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1)); 844 PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2)); 845 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 846 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 847 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 848 PetscCall(VecScatterDestroy(&vecscat)); 849 PetscCall(ISDestroy(&list1)); 850 PetscCall(ISDestroy(&list2)); 851 #else 852 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 853 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 854 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 855 856 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1)); 857 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2)); 858 859 if (dim[2]%2==0) NM = dim[2]+2; 860 else NM = dim[2]+1; 861 862 for (i=0; i<local_n0; i++) { 863 for (j=0; j<dim[1]; j++) { 864 for (k=0; k<dim[2]; k++) { 865 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 866 tempindx1 = i*dim[1]*NM + j*NM + k; 867 868 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 869 indx2[tempindx]=low+tempindx1; 870 } 871 } 872 } 873 874 PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1)); 875 PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2)); 876 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 877 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 878 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 879 PetscCall(VecScatterDestroy(&vecscat)); 880 PetscCall(ISDestroy(&list1)); 881 PetscCall(ISDestroy(&list2)); 882 PetscCall(PetscFree(indx1)); 883 PetscCall(PetscFree(indx2)); 884 #endif 885 break; 886 887 default: 888 #if defined(PETSC_USE_COMPLEX) 889 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 890 891 PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1)); 892 PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2)); 893 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 894 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 895 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 896 PetscCall(VecScatterDestroy(&vecscat)); 897 PetscCall(ISDestroy(&list1)); 898 PetscCall(ISDestroy(&list2)); 899 #else 900 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 901 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 902 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 903 904 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 905 906 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 907 908 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 909 910 partial_dim = fftw->partial_dim; 911 912 PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1)); 913 PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2)); 914 915 if (dim[ndim-1]%2==0) NM = 2; 916 else NM = 1; 917 918 j = low; 919 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 920 indx1[i] = local_0_start*partial_dim + i; 921 indx2[i] = j; 922 if (k%dim[ndim-1]==0) j+=NM; 923 j++; 924 } 925 PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1)); 926 PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2)); 927 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 928 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 929 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 930 PetscCall(VecScatterDestroy(&vecscat)); 931 PetscCall(ISDestroy(&list1)); 932 PetscCall(ISDestroy(&list2)); 933 PetscCall(PetscFree(indx1)); 934 PetscCall(PetscFree(indx2)); 935 #endif 936 break; 937 } 938 #endif 939 } 940 PetscFunctionReturn(0); 941 } 942 943 /*@ 944 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 945 946 Collective on Mat 947 948 Input Parameters: 949 + A - FFTW matrix 950 - x - FFTW vector 951 952 Output Parameters: 953 . y - PETSc vector 954 955 Level: intermediate 956 957 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 958 VecScatterFFTWToPetsc removes those extra zeros. 959 960 .seealso: `VecScatterPetscToFFTW()` 961 @*/ 962 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 963 { 964 PetscFunctionBegin; 965 PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y)); 966 PetscFunctionReturn(0); 967 } 968 969 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 970 { 971 MPI_Comm comm; 972 Mat_FFT *fft = (Mat_FFT*)A->data; 973 PetscInt low; 974 PetscMPIInt rank,size; 975 VecScatter vecscat; 976 IS list1; 977 978 PetscFunctionBegin; 979 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 980 PetscCallMPI(MPI_Comm_size(comm, &size)); 981 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 982 PetscCall(VecGetOwnershipRange(x,&low,NULL)); 983 984 if (size==1) { 985 PetscCall(ISCreateStride(comm,fft->N,0,1,&list1)); 986 PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat)); 987 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 988 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 989 PetscCall(VecScatterDestroy(&vecscat)); 990 PetscCall(ISDestroy(&list1)); 991 992 #if !PetscDefined(HAVE_MPIUNI) 993 } else { 994 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 995 PetscInt ndim = fft->ndim,*dim = fft->dim; 996 ptrdiff_t local_n0,local_0_start; 997 ptrdiff_t local_n1,local_1_start; 998 IS list2; 999 #if !defined(PETSC_USE_COMPLEX) 1000 PetscInt i,j,k,partial_dim; 1001 PetscInt *indx1, *indx2, tempindx, tempindx1; 1002 PetscInt NM; 1003 ptrdiff_t temp; 1004 #endif 1005 switch (ndim) { 1006 case 1: 1007 #if defined(PETSC_USE_COMPLEX) 1008 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1009 1010 PetscCall(ISCreateStride(comm,local_n1,local_1_start,1,&list1)); 1011 PetscCall(ISCreateStride(comm,local_n1,low,1,&list2)); 1012 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 1013 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1014 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1015 PetscCall(VecScatterDestroy(&vecscat)); 1016 PetscCall(ISDestroy(&list1)); 1017 PetscCall(ISDestroy(&list2)); 1018 #else 1019 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1020 #endif 1021 break; 1022 case 2: 1023 #if defined(PETSC_USE_COMPLEX) 1024 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1025 1026 PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1)); 1027 PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2)); 1028 PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 1029 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1030 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1031 PetscCall(VecScatterDestroy(&vecscat)); 1032 PetscCall(ISDestroy(&list1)); 1033 PetscCall(ISDestroy(&list2)); 1034 #else 1035 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1036 1037 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1)); 1038 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2)); 1039 1040 if (dim[1]%2==0) NM = dim[1]+2; 1041 else NM = dim[1]+1; 1042 1043 for (i=0; i<local_n0; i++) { 1044 for (j=0; j<dim[1]; j++) { 1045 tempindx = i*dim[1] + j; 1046 tempindx1 = i*NM + j; 1047 1048 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1049 indx2[tempindx]=low+tempindx1; 1050 } 1051 } 1052 1053 PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1)); 1054 PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2)); 1055 1056 PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 1057 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1058 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1059 PetscCall(VecScatterDestroy(&vecscat)); 1060 PetscCall(ISDestroy(&list1)); 1061 PetscCall(ISDestroy(&list2)); 1062 PetscCall(PetscFree(indx1)); 1063 PetscCall(PetscFree(indx2)); 1064 #endif 1065 break; 1066 case 3: 1067 #if defined(PETSC_USE_COMPLEX) 1068 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1069 1070 PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1)); 1071 PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2)); 1072 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 1073 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1074 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1075 PetscCall(VecScatterDestroy(&vecscat)); 1076 PetscCall(ISDestroy(&list1)); 1077 PetscCall(ISDestroy(&list2)); 1078 #else 1079 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1080 1081 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1)); 1082 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2)); 1083 1084 if (dim[2]%2==0) NM = dim[2]+2; 1085 else NM = dim[2]+1; 1086 1087 for (i=0; i<local_n0; i++) { 1088 for (j=0; j<dim[1]; j++) { 1089 for (k=0; k<dim[2]; k++) { 1090 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1091 tempindx1 = i*dim[1]*NM + j*NM + k; 1092 1093 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1094 indx2[tempindx]=low+tempindx1; 1095 } 1096 } 1097 } 1098 1099 PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1)); 1100 PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2)); 1101 1102 PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 1103 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1104 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1105 PetscCall(VecScatterDestroy(&vecscat)); 1106 PetscCall(ISDestroy(&list1)); 1107 PetscCall(ISDestroy(&list2)); 1108 PetscCall(PetscFree(indx1)); 1109 PetscCall(PetscFree(indx2)); 1110 #endif 1111 break; 1112 default: 1113 #if defined(PETSC_USE_COMPLEX) 1114 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1115 1116 PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1)); 1117 PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2)); 1118 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 1119 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1120 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1121 PetscCall(VecScatterDestroy(&vecscat)); 1122 PetscCall(ISDestroy(&list1)); 1123 PetscCall(ISDestroy(&list2)); 1124 #else 1125 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1126 1127 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1128 1129 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1130 1131 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1132 1133 partial_dim = fftw->partial_dim; 1134 1135 PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1)); 1136 PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2)); 1137 1138 if (dim[ndim-1]%2==0) NM = 2; 1139 else NM = 1; 1140 1141 j = low; 1142 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1143 indx1[i] = local_0_start*partial_dim + i; 1144 indx2[i] = j; 1145 if (k%dim[ndim-1]==0) j+=NM; 1146 j++; 1147 } 1148 PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1)); 1149 PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2)); 1150 1151 PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 1152 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1153 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1154 PetscCall(VecScatterDestroy(&vecscat)); 1155 PetscCall(ISDestroy(&list1)); 1156 PetscCall(ISDestroy(&list2)); 1157 PetscCall(PetscFree(indx1)); 1158 PetscCall(PetscFree(indx2)); 1159 #endif 1160 break; 1161 } 1162 #endif 1163 } 1164 PetscFunctionReturn(0); 1165 } 1166 1167 /* 1168 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1169 1170 Options Database Keys: 1171 + -mat_fftw_plannerflags - set FFTW planner flags 1172 1173 Level: intermediate 1174 1175 */ 1176 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1177 { 1178 MPI_Comm comm; 1179 Mat_FFT *fft = (Mat_FFT*)A->data; 1180 Mat_FFTW *fftw; 1181 PetscInt ndim = fft->ndim,*dim = fft->dim; 1182 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1183 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1184 PetscBool flg; 1185 PetscInt p_flag,partial_dim=1,ctr; 1186 PetscMPIInt size,rank; 1187 ptrdiff_t *pdim; 1188 #if !defined(PETSC_USE_COMPLEX) 1189 PetscInt tot_dim; 1190 #endif 1191 1192 PetscFunctionBegin; 1193 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1194 PetscCallMPI(MPI_Comm_size(comm, &size)); 1195 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1196 1197 #if !PetscDefined(HAVE_MPIUNI) 1198 fftw_mpi_init(); 1199 #endif 1200 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1201 pdim[0] = dim[0]; 1202 #if !defined(PETSC_USE_COMPLEX) 1203 tot_dim = 2*dim[0]; 1204 #endif 1205 for (ctr=1; ctr<ndim; ctr++) { 1206 partial_dim *= dim[ctr]; 1207 pdim[ctr] = dim[ctr]; 1208 #if !defined(PETSC_USE_COMPLEX) 1209 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1210 else tot_dim *= dim[ctr]; 1211 #endif 1212 } 1213 1214 if (size == 1) { 1215 #if defined(PETSC_USE_COMPLEX) 1216 PetscCall(MatSetSizes(A,fft->N,fft->N,fft->N,fft->N)); 1217 fft->n = fft->N; 1218 #else 1219 PetscCall(MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim)); 1220 fft->n = tot_dim; 1221 #endif 1222 #if !PetscDefined(HAVE_MPIUNI) 1223 } else { 1224 ptrdiff_t local_n0,local_0_start,local_n1,local_1_start; 1225 #if !defined(PETSC_USE_COMPLEX) 1226 ptrdiff_t temp; 1227 PetscInt N1; 1228 #endif 1229 1230 switch (ndim) { 1231 case 1: 1232 #if !defined(PETSC_USE_COMPLEX) 1233 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1234 #else 1235 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1236 fft->n = (PetscInt)local_n0; 1237 PetscCall(MatSetSizes(A,local_n1,fft->n,fft->N,fft->N)); 1238 #endif 1239 break; 1240 case 2: 1241 #if defined(PETSC_USE_COMPLEX) 1242 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1243 fft->n = (PetscInt)local_n0*dim[1]; 1244 PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 1245 #else 1246 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1247 1248 fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1249 PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1))); 1250 #endif 1251 break; 1252 case 3: 1253 #if defined(PETSC_USE_COMPLEX) 1254 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1255 1256 fft->n = (PetscInt)local_n0*dim[1]*dim[2]; 1257 PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 1258 #else 1259 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1260 1261 fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1262 PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1))); 1263 #endif 1264 break; 1265 default: 1266 #if defined(PETSC_USE_COMPLEX) 1267 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1268 1269 fft->n = (PetscInt)local_n0*partial_dim; 1270 PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 1271 #else 1272 temp = pdim[ndim-1]; 1273 1274 pdim[ndim-1] = temp/2 + 1; 1275 1276 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1277 1278 fft->n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1279 N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1280 1281 pdim[ndim-1] = temp; 1282 1283 PetscCall(MatSetSizes(A,fft->n,fft->n,N1,N1)); 1284 #endif 1285 break; 1286 } 1287 #endif 1288 } 1289 free(pdim); 1290 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATFFTW)); 1291 PetscCall(PetscNewLog(A,&fftw)); 1292 fft->data = (void*)fftw; 1293 1294 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1295 fftw->partial_dim = partial_dim; 1296 1297 PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 1298 if (size == 1) { 1299 #if defined(PETSC_USE_64BIT_INDICES) 1300 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1301 #else 1302 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1303 #endif 1304 } 1305 1306 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1307 1308 fftw->p_forward = NULL; 1309 fftw->p_backward = NULL; 1310 fftw->p_flag = FFTW_ESTIMATE; 1311 1312 if (size == 1) { 1313 A->ops->mult = MatMult_SeqFFTW; 1314 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1315 #if !PetscDefined(HAVE_MPIUNI) 1316 } else { 1317 A->ops->mult = MatMult_MPIFFTW; 1318 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1319 #endif 1320 } 1321 fft->matdestroy = MatDestroy_FFTW; 1322 A->assembled = PETSC_TRUE; 1323 A->preallocated = PETSC_TRUE; 1324 1325 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW)); 1326 PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW)); 1327 PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW)); 1328 1329 /* get runtime options */ 1330 PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat"); 1331 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg)); 1332 if (flg) fftw->p_flag = iplans[p_flag]; 1333 PetscOptionsEnd(); 1334 PetscFunctionReturn(0); 1335 } 1336