1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #if !PetscDefined(HAVE_MPIUNI) 10 #include <fftw3-mpi.h> 11 #else 12 #include <fftw3.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 ptrdiff_t ndim_fftw,*dim_fftw; 18 #if defined(PETSC_USE_64BIT_INDICES) 19 fftw_iodim64 *iodims; 20 #else 21 fftw_iodim *iodims; 22 #endif 23 PetscInt partial_dim; 24 fftw_plan p_forward,p_backward; 25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 27 executed for the arrays with which the plan was created */ 28 } Mat_FFTW; 29 30 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 32 extern PetscErrorCode MatDestroy_FFTW(Mat); 33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*); 34 #if !PetscDefined(HAVE_MPIUNI) 35 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 38 #endif 39 40 /* 41 MatMult_SeqFFTW performs forward DFT 42 Input parameter: 43 A - the matrix 44 x - the vector on which FDFT will be performed 45 46 Output parameter: 47 y - vector that stores result of FDFT 48 */ 49 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 50 { 51 Mat_FFT *fft = (Mat_FFT*)A->data; 52 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 53 const PetscScalar *x_array; 54 PetscScalar *y_array; 55 #if defined(PETSC_USE_COMPLEX) 56 #if defined(PETSC_USE_64BIT_INDICES) 57 fftw_iodim64 *iodims; 58 #else 59 fftw_iodim *iodims; 60 #endif 61 PetscInt i; 62 #endif 63 PetscInt ndim = fft->ndim,*dim = fft->dim; 64 65 PetscFunctionBegin; 66 PetscCall(VecGetArrayRead(x,&x_array)); 67 PetscCall(VecGetArray(y,&y_array)); 68 if (!fftw->p_forward) { /* create a plan, then execute it */ 69 switch (ndim) { 70 case 1: 71 #if defined(PETSC_USE_COMPLEX) 72 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 73 #else 74 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 75 #endif 76 break; 77 case 2: 78 #if defined(PETSC_USE_COMPLEX) 79 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 80 #else 81 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 82 #endif 83 break; 84 case 3: 85 #if defined(PETSC_USE_COMPLEX) 86 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 87 #else 88 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 89 #endif 90 break; 91 default: 92 #if defined(PETSC_USE_COMPLEX) 93 iodims = fftw->iodims; 94 #if defined(PETSC_USE_64BIT_INDICES) 95 if (ndim) { 96 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 97 iodims[ndim-1].is = iodims[ndim-1].os = 1; 98 for (i=ndim-2; i>=0; --i) { 99 iodims[i].n = (ptrdiff_t)dim[i]; 100 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 101 } 102 } 103 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 104 #else 105 if (ndim) { 106 iodims[ndim-1].n = (int)dim[ndim-1]; 107 iodims[ndim-1].is = iodims[ndim-1].os = 1; 108 for (i=ndim-2; i>=0; --i) { 109 iodims[i].n = (int)dim[i]; 110 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 111 } 112 } 113 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 114 #endif 115 116 #else 117 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 118 #endif 119 break; 120 } 121 fftw->finarray = (PetscScalar *) x_array; 122 fftw->foutarray = y_array; 123 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 124 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 125 fftw_execute(fftw->p_forward); 126 } else { /* use existing plan */ 127 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 128 #if defined(PETSC_USE_COMPLEX) 129 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 130 #else 131 fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 132 #endif 133 } else { 134 fftw_execute(fftw->p_forward); 135 } 136 } 137 PetscCall(VecRestoreArray(y,&y_array)); 138 PetscCall(VecRestoreArrayRead(x,&x_array)); 139 PetscFunctionReturn(0); 140 } 141 142 /* MatMultTranspose_SeqFFTW performs serial backward DFT 143 Input parameter: 144 A - the matrix 145 x - the vector on which BDFT will be performed 146 147 Output parameter: 148 y - vector that stores result of BDFT 149 */ 150 151 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 152 { 153 Mat_FFT *fft = (Mat_FFT*)A->data; 154 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 155 const PetscScalar *x_array; 156 PetscScalar *y_array; 157 PetscInt ndim = fft->ndim,*dim = fft->dim; 158 #if defined(PETSC_USE_COMPLEX) 159 #if defined(PETSC_USE_64BIT_INDICES) 160 fftw_iodim64 *iodims = fftw->iodims; 161 #else 162 fftw_iodim *iodims = fftw->iodims; 163 #endif 164 #endif 165 166 PetscFunctionBegin; 167 PetscCall(VecGetArrayRead(x,&x_array)); 168 PetscCall(VecGetArray(y,&y_array)); 169 if (!fftw->p_backward) { /* create a plan, then execute it */ 170 switch (ndim) { 171 case 1: 172 #if defined(PETSC_USE_COMPLEX) 173 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 174 #else 175 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 176 #endif 177 break; 178 case 2: 179 #if defined(PETSC_USE_COMPLEX) 180 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 181 #else 182 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 183 #endif 184 break; 185 case 3: 186 #if defined(PETSC_USE_COMPLEX) 187 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 188 #else 189 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 190 #endif 191 break; 192 default: 193 #if defined(PETSC_USE_COMPLEX) 194 #if defined(PETSC_USE_64BIT_INDICES) 195 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 196 #else 197 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 198 #endif 199 #else 200 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 201 #endif 202 break; 203 } 204 fftw->binarray = (PetscScalar *) x_array; 205 fftw->boutarray = y_array; 206 fftw_execute(fftw->p_backward); 207 } else { /* use existing plan */ 208 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 209 #if defined(PETSC_USE_COMPLEX) 210 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 211 #else 212 fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 213 #endif 214 } else { 215 fftw_execute(fftw->p_backward); 216 } 217 } 218 PetscCall(VecRestoreArray(y,&y_array)); 219 PetscCall(VecRestoreArrayRead(x,&x_array)); 220 PetscFunctionReturn(0); 221 } 222 223 #if !PetscDefined(HAVE_MPIUNI) 224 /* MatMult_MPIFFTW performs forward DFT in parallel 225 Input parameter: 226 A - the matrix 227 x - the vector on which FDFT will be performed 228 229 Output parameter: 230 y - vector that stores result of FDFT 231 */ 232 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 233 { 234 Mat_FFT *fft = (Mat_FFT*)A->data; 235 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 236 const PetscScalar *x_array; 237 PetscScalar *y_array; 238 PetscInt ndim = fft->ndim,*dim = fft->dim; 239 MPI_Comm comm; 240 241 PetscFunctionBegin; 242 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 243 PetscCall(VecGetArrayRead(x,&x_array)); 244 PetscCall(VecGetArray(y,&y_array)); 245 if (!fftw->p_forward) { /* create a plan, then execute it */ 246 switch (ndim) { 247 case 1: 248 #if defined(PETSC_USE_COMPLEX) 249 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 250 #else 251 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 252 #endif 253 break; 254 case 2: 255 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 256 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 257 #else 258 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 259 #endif 260 break; 261 case 3: 262 #if defined(PETSC_USE_COMPLEX) 263 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 264 #else 265 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 266 #endif 267 break; 268 default: 269 #if defined(PETSC_USE_COMPLEX) 270 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 271 #else 272 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 273 #endif 274 break; 275 } 276 fftw->finarray = (PetscScalar *) x_array; 277 fftw->foutarray = y_array; 278 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 279 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 280 fftw_execute(fftw->p_forward); 281 } else { /* use existing plan */ 282 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 283 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 284 } else { 285 fftw_execute(fftw->p_forward); 286 } 287 } 288 PetscCall(VecRestoreArray(y,&y_array)); 289 PetscCall(VecRestoreArrayRead(x,&x_array)); 290 PetscFunctionReturn(0); 291 } 292 293 /* 294 MatMultTranspose_MPIFFTW performs parallel backward DFT 295 Input parameter: 296 A - the matrix 297 x - the vector on which BDFT will be performed 298 299 Output parameter: 300 y - vector that stores result of BDFT 301 */ 302 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 303 { 304 Mat_FFT *fft = (Mat_FFT*)A->data; 305 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 306 const PetscScalar *x_array; 307 PetscScalar *y_array; 308 PetscInt ndim = fft->ndim,*dim = fft->dim; 309 MPI_Comm comm; 310 311 PetscFunctionBegin; 312 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 313 PetscCall(VecGetArrayRead(x,&x_array)); 314 PetscCall(VecGetArray(y,&y_array)); 315 if (!fftw->p_backward) { /* create a plan, then execute it */ 316 switch (ndim) { 317 case 1: 318 #if defined(PETSC_USE_COMPLEX) 319 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 320 #else 321 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 322 #endif 323 break; 324 case 2: 325 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 326 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 327 #else 328 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 329 #endif 330 break; 331 case 3: 332 #if defined(PETSC_USE_COMPLEX) 333 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 334 #else 335 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 336 #endif 337 break; 338 default: 339 #if defined(PETSC_USE_COMPLEX) 340 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 341 #else 342 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 343 #endif 344 break; 345 } 346 fftw->binarray = (PetscScalar *) x_array; 347 fftw->boutarray = y_array; 348 fftw_execute(fftw->p_backward); 349 } else { /* use existing plan */ 350 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 351 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 352 } else { 353 fftw_execute(fftw->p_backward); 354 } 355 } 356 PetscCall(VecRestoreArray(y,&y_array)); 357 PetscCall(VecRestoreArrayRead(x,&x_array)); 358 PetscFunctionReturn(0); 359 } 360 #endif 361 362 PetscErrorCode MatDestroy_FFTW(Mat A) 363 { 364 Mat_FFT *fft = (Mat_FFT*)A->data; 365 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 366 367 PetscFunctionBegin; 368 fftw_destroy_plan(fftw->p_forward); 369 fftw_destroy_plan(fftw->p_backward); 370 if (fftw->iodims) { 371 free(fftw->iodims); 372 } 373 PetscCall(PetscFree(fftw->dim_fftw)); 374 PetscCall(PetscFree(fft->data)); 375 #if !PetscDefined(HAVE_MPIUNI) 376 fftw_mpi_cleanup(); 377 #endif 378 PetscFunctionReturn(0); 379 } 380 381 #if !PetscDefined(HAVE_MPIUNI) 382 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 383 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 384 { 385 PetscScalar *array; 386 387 PetscFunctionBegin; 388 PetscCall(VecGetArray(v,&array)); 389 fftw_free((fftw_complex*)array); 390 PetscCall(VecRestoreArray(v,&array)); 391 PetscCall(VecDestroy_MPI(v)); 392 PetscFunctionReturn(0); 393 } 394 #endif 395 396 #if !PetscDefined(HAVE_MPIUNI) 397 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new) 398 { 399 Mat A; 400 401 PetscFunctionBegin; 402 PetscCall(PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A)); 403 PetscCall(MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL)); 404 PetscFunctionReturn(0); 405 } 406 407 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new) 408 { 409 Mat A; 410 411 PetscFunctionBegin; 412 PetscCall(PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A)); 413 PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL)); 414 PetscFunctionReturn(0); 415 } 416 417 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 418 { 419 Mat A; 420 421 PetscFunctionBegin; 422 PetscCall(PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A)); 423 PetscCall(MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new)); 424 PetscFunctionReturn(0); 425 } 426 #endif 427 428 /*@ 429 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 430 parallel layout determined by FFTW 431 432 Collective on Mat 433 434 Input Parameter: 435 . A - the matrix 436 437 Output Parameters: 438 + x - (optional) input vector of forward FFTW 439 . y - (optional) output vector of forward FFTW 440 - z - (optional) output vector of backward FFTW 441 442 Level: advanced 443 444 Note: The parallel layout of output of forward FFTW is always same as the input 445 of the backward FFTW. But parallel layout of the input vector of forward 446 FFTW might not be same as the output of backward FFTW. 447 Also note that we need to provide enough space while doing parallel real transform. 448 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 449 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 450 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 451 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 452 zeros if it is odd only one zero is needed. 453 Lastly one needs some scratch space at the end of data set in each process. alloc_local 454 figures out how much space is needed, i.e. it figures out the data+scratch space for 455 each processor and returns that. 456 457 .seealso: `MatCreateFFT()` 458 @*/ 459 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 460 { 461 PetscFunctionBegin; 462 PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z)); 463 PetscFunctionReturn(0); 464 } 465 466 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 467 { 468 PetscMPIInt size,rank; 469 MPI_Comm comm; 470 Mat_FFT *fft = (Mat_FFT*)A->data; 471 472 PetscFunctionBegin; 473 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 474 PetscValidType(A,1); 475 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 476 477 PetscCallMPI(MPI_Comm_size(comm, &size)); 478 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 479 if (size == 1) { /* sequential case */ 480 #if defined(PETSC_USE_COMPLEX) 481 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fin)); 482 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,fout)); 483 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->N,bout)); 484 #else 485 if (fin) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fin)); 486 if (fout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,fout)); 487 if (bout) PetscCall(VecCreateSeq(PETSC_COMM_SELF,fft->n,bout)); 488 #endif 489 #if !PetscDefined(HAVE_MPIUNI) 490 } else { /* parallel cases */ 491 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 492 PetscInt ndim = fft->ndim,*dim = fft->dim; 493 ptrdiff_t alloc_local,local_n0,local_0_start; 494 ptrdiff_t local_n1; 495 fftw_complex *data_fout; 496 ptrdiff_t local_1_start; 497 #if defined(PETSC_USE_COMPLEX) 498 fftw_complex *data_fin,*data_bout; 499 #else 500 double *data_finr,*data_boutr; 501 PetscInt n1,N1; 502 ptrdiff_t temp; 503 #endif 504 505 switch (ndim) { 506 case 1: 507 #if !defined(PETSC_USE_COMPLEX) 508 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 509 #else 510 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 511 if (fin) { 512 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 513 PetscCall(VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin)); 514 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 515 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 516 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 517 } 518 if (fout) { 519 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 520 PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout)); 521 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 522 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 523 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 524 } 525 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 526 if (bout) { 527 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528 PetscCall(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout)); 529 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 530 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 531 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 532 } 533 break; 534 #endif 535 case 2: 536 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 537 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 538 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 539 if (fin) { 540 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 541 PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin)); 542 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 543 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 544 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 545 } 546 if (fout) { 547 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 548 PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout)); 549 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 550 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 551 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 if (bout) { 554 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 555 PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout)); 556 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 557 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 558 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 559 } 560 #else 561 /* Get local size */ 562 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 563 if (fin) { 564 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 565 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 566 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 567 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 568 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 569 } 570 if (fout) { 571 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 572 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 573 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 574 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 575 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 576 } 577 if (bout) { 578 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 579 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 580 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 581 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 582 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 583 } 584 #endif 585 break; 586 case 3: 587 #if !defined(PETSC_USE_COMPLEX) 588 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 589 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 590 if (fin) { 591 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 592 PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin)); 593 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 594 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 595 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 596 } 597 if (fout) { 598 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 599 PetscCall(VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout)); 600 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 601 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 602 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 603 } 604 if (bout) { 605 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 606 PetscCall(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout)); 607 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 608 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 609 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 610 } 611 #else 612 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 613 if (fin) { 614 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 615 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 616 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 617 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 618 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 619 } 620 if (fout) { 621 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 622 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 623 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 624 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 625 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 626 } 627 if (bout) { 628 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 629 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 630 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 631 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 632 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 633 } 634 #endif 635 break; 636 default: 637 #if !defined(PETSC_USE_COMPLEX) 638 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 639 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 640 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 641 N1 = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 642 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 643 644 if (fin) { 645 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 646 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin)); 647 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 648 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 649 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 650 } 651 if (fout) { 652 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 653 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout)); 654 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 655 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 656 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 657 } 658 if (bout) { 659 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 660 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout)); 661 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 662 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 663 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 664 } 665 #else 666 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 667 if (fin) { 668 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 669 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 670 PetscCall(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 671 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 672 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 673 } 674 if (fout) { 675 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 676 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 677 PetscCall(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 678 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 679 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 680 } 681 if (bout) { 682 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 683 PetscCall(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 684 PetscCall(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 685 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 686 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 687 } 688 #endif 689 break; 690 } 691 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 692 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 693 memory leaks. We void these pointers here to avoid acident uses. 694 */ 695 if (fin) (*fin)->ops->replacearray = NULL; 696 if (fout) (*fout)->ops->replacearray = NULL; 697 if (bout) (*bout)->ops->replacearray = NULL; 698 #endif 699 } 700 PetscFunctionReturn(0); 701 } 702 703 /*@ 704 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 705 706 Collective on Mat 707 708 Input Parameters: 709 + A - FFTW matrix 710 - x - the PETSc vector 711 712 Output Parameters: 713 . y - the FFTW vector 714 715 Options Database Keys: 716 . -mat_fftw_plannerflags - set FFTW planner flags 717 718 Level: intermediate 719 720 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 721 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 722 zeros. This routine does that job by scattering operation. 723 724 .seealso: `VecScatterFFTWToPetsc()` 725 @*/ 726 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 727 { 728 PetscFunctionBegin; 729 PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y)); 730 PetscFunctionReturn(0); 731 } 732 733 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 734 { 735 MPI_Comm comm; 736 Mat_FFT *fft = (Mat_FFT*)A->data; 737 PetscInt low; 738 PetscMPIInt rank,size; 739 PetscInt vsize,vsize1; 740 VecScatter vecscat; 741 IS list1; 742 743 PetscFunctionBegin; 744 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 745 PetscCallMPI(MPI_Comm_size(comm, &size)); 746 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 747 PetscCall(VecGetOwnershipRange(y,&low,NULL)); 748 749 if (size==1) { 750 PetscCall(VecGetSize(x,&vsize)); 751 PetscCall(VecGetSize(y,&vsize1)); 752 PetscCall(ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1)); 753 PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat)); 754 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 755 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 756 PetscCall(VecScatterDestroy(&vecscat)); 757 PetscCall(ISDestroy(&list1)); 758 #if !PetscDefined(HAVE_MPIUNI) 759 } else { 760 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 761 PetscInt ndim = fft->ndim,*dim = fft->dim; 762 ptrdiff_t local_n0,local_0_start; 763 ptrdiff_t local_n1,local_1_start; 764 IS list2; 765 #if !defined(PETSC_USE_COMPLEX) 766 PetscInt i,j,k,partial_dim; 767 PetscInt *indx1, *indx2, tempindx, tempindx1; 768 PetscInt NM; 769 ptrdiff_t temp; 770 #endif 771 772 switch (ndim) { 773 case 1: 774 #if defined(PETSC_USE_COMPLEX) 775 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 776 777 PetscCall(ISCreateStride(comm,local_n0,local_0_start,1,&list1)); 778 PetscCall(ISCreateStride(comm,local_n0,low,1,&list2)); 779 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 780 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 781 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 782 PetscCall(VecScatterDestroy(&vecscat)); 783 PetscCall(ISDestroy(&list1)); 784 PetscCall(ISDestroy(&list2)); 785 #else 786 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 787 #endif 788 break; 789 case 2: 790 #if defined(PETSC_USE_COMPLEX) 791 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 792 793 PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1)); 794 PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2)); 795 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 796 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 797 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 798 PetscCall(VecScatterDestroy(&vecscat)); 799 PetscCall(ISDestroy(&list1)); 800 PetscCall(ISDestroy(&list2)); 801 #else 802 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 803 804 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1)); 805 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2)); 806 807 if (dim[1]%2==0) { 808 NM = dim[1]+2; 809 } else { 810 NM = dim[1]+1; 811 } 812 for (i=0; i<local_n0; i++) { 813 for (j=0; j<dim[1]; j++) { 814 tempindx = i*dim[1] + j; 815 tempindx1 = i*NM + j; 816 817 indx1[tempindx]=local_0_start*dim[1]+tempindx; 818 indx2[tempindx]=low+tempindx1; 819 } 820 } 821 822 PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1)); 823 PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2)); 824 825 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 826 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 827 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 828 PetscCall(VecScatterDestroy(&vecscat)); 829 PetscCall(ISDestroy(&list1)); 830 PetscCall(ISDestroy(&list2)); 831 PetscCall(PetscFree(indx1)); 832 PetscCall(PetscFree(indx2)); 833 #endif 834 break; 835 836 case 3: 837 #if defined(PETSC_USE_COMPLEX) 838 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 839 840 PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1)); 841 PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2)); 842 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 843 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 844 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 845 PetscCall(VecScatterDestroy(&vecscat)); 846 PetscCall(ISDestroy(&list1)); 847 PetscCall(ISDestroy(&list2)); 848 #else 849 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 850 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 851 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 852 853 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1)); 854 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2)); 855 856 if (dim[2]%2==0) NM = dim[2]+2; 857 else NM = dim[2]+1; 858 859 for (i=0; i<local_n0; i++) { 860 for (j=0; j<dim[1]; j++) { 861 for (k=0; k<dim[2]; k++) { 862 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 863 tempindx1 = i*dim[1]*NM + j*NM + k; 864 865 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 866 indx2[tempindx]=low+tempindx1; 867 } 868 } 869 } 870 871 PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1)); 872 PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2)); 873 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 874 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 875 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 876 PetscCall(VecScatterDestroy(&vecscat)); 877 PetscCall(ISDestroy(&list1)); 878 PetscCall(ISDestroy(&list2)); 879 PetscCall(PetscFree(indx1)); 880 PetscCall(PetscFree(indx2)); 881 #endif 882 break; 883 884 default: 885 #if defined(PETSC_USE_COMPLEX) 886 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 887 888 PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1)); 889 PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2)); 890 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 891 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 892 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 893 PetscCall(VecScatterDestroy(&vecscat)); 894 PetscCall(ISDestroy(&list1)); 895 PetscCall(ISDestroy(&list2)); 896 #else 897 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 898 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 899 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 900 901 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 902 903 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 904 905 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 906 907 partial_dim = fftw->partial_dim; 908 909 PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1)); 910 PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2)); 911 912 if (dim[ndim-1]%2==0) NM = 2; 913 else NM = 1; 914 915 j = low; 916 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 917 indx1[i] = local_0_start*partial_dim + i; 918 indx2[i] = j; 919 if (k%dim[ndim-1]==0) j+=NM; 920 j++; 921 } 922 PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1)); 923 PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2)); 924 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 925 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 926 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 927 PetscCall(VecScatterDestroy(&vecscat)); 928 PetscCall(ISDestroy(&list1)); 929 PetscCall(ISDestroy(&list2)); 930 PetscCall(PetscFree(indx1)); 931 PetscCall(PetscFree(indx2)); 932 #endif 933 break; 934 } 935 #endif 936 } 937 PetscFunctionReturn(0); 938 } 939 940 /*@ 941 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 942 943 Collective on Mat 944 945 Input Parameters: 946 + A - FFTW matrix 947 - x - FFTW vector 948 949 Output Parameters: 950 . y - PETSc vector 951 952 Level: intermediate 953 954 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 955 VecScatterFFTWToPetsc removes those extra zeros. 956 957 .seealso: `VecScatterPetscToFFTW()` 958 @*/ 959 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 960 { 961 PetscFunctionBegin; 962 PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y)); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 967 { 968 MPI_Comm comm; 969 Mat_FFT *fft = (Mat_FFT*)A->data; 970 PetscInt low; 971 PetscMPIInt rank,size; 972 VecScatter vecscat; 973 IS list1; 974 975 PetscFunctionBegin; 976 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 977 PetscCallMPI(MPI_Comm_size(comm, &size)); 978 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 979 PetscCall(VecGetOwnershipRange(x,&low,NULL)); 980 981 if (size==1) { 982 PetscCall(ISCreateStride(comm,fft->N,0,1,&list1)); 983 PetscCall(VecScatterCreate(x,list1,y,list1,&vecscat)); 984 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 985 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 986 PetscCall(VecScatterDestroy(&vecscat)); 987 PetscCall(ISDestroy(&list1)); 988 989 #if !PetscDefined(HAVE_MPIUNI) 990 } else { 991 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 992 PetscInt ndim = fft->ndim,*dim = fft->dim; 993 ptrdiff_t local_n0,local_0_start; 994 ptrdiff_t local_n1,local_1_start; 995 IS list2; 996 #if !defined(PETSC_USE_COMPLEX) 997 PetscInt i,j,k,partial_dim; 998 PetscInt *indx1, *indx2, tempindx, tempindx1; 999 PetscInt NM; 1000 ptrdiff_t temp; 1001 #endif 1002 switch (ndim) { 1003 case 1: 1004 #if defined(PETSC_USE_COMPLEX) 1005 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1006 1007 PetscCall(ISCreateStride(comm,local_n1,local_1_start,1,&list1)); 1008 PetscCall(ISCreateStride(comm,local_n1,low,1,&list2)); 1009 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 1010 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1011 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1012 PetscCall(VecScatterDestroy(&vecscat)); 1013 PetscCall(ISDestroy(&list1)); 1014 PetscCall(ISDestroy(&list2)); 1015 #else 1016 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1017 #endif 1018 break; 1019 case 2: 1020 #if defined(PETSC_USE_COMPLEX) 1021 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1022 1023 PetscCall(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1)); 1024 PetscCall(ISCreateStride(comm,local_n0*dim[1],low,1,&list2)); 1025 PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 1026 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1027 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1028 PetscCall(VecScatterDestroy(&vecscat)); 1029 PetscCall(ISDestroy(&list1)); 1030 PetscCall(ISDestroy(&list2)); 1031 #else 1032 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1033 1034 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1)); 1035 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2)); 1036 1037 if (dim[1]%2==0) NM = dim[1]+2; 1038 else NM = dim[1]+1; 1039 1040 for (i=0; i<local_n0; i++) { 1041 for (j=0; j<dim[1]; j++) { 1042 tempindx = i*dim[1] + j; 1043 tempindx1 = i*NM + j; 1044 1045 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1046 indx2[tempindx]=low+tempindx1; 1047 } 1048 } 1049 1050 PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1)); 1051 PetscCall(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2)); 1052 1053 PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 1054 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1055 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1056 PetscCall(VecScatterDestroy(&vecscat)); 1057 PetscCall(ISDestroy(&list1)); 1058 PetscCall(ISDestroy(&list2)); 1059 PetscCall(PetscFree(indx1)); 1060 PetscCall(PetscFree(indx2)); 1061 #endif 1062 break; 1063 case 3: 1064 #if defined(PETSC_USE_COMPLEX) 1065 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1066 1067 PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1)); 1068 PetscCall(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2)); 1069 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 1070 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1071 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1072 PetscCall(VecScatterDestroy(&vecscat)); 1073 PetscCall(ISDestroy(&list1)); 1074 PetscCall(ISDestroy(&list2)); 1075 #else 1076 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1077 1078 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1)); 1079 PetscCall(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2)); 1080 1081 if (dim[2]%2==0) NM = dim[2]+2; 1082 else NM = dim[2]+1; 1083 1084 for (i=0; i<local_n0; i++) { 1085 for (j=0; j<dim[1]; j++) { 1086 for (k=0; k<dim[2]; k++) { 1087 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1088 tempindx1 = i*dim[1]*NM + j*NM + k; 1089 1090 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1091 indx2[tempindx]=low+tempindx1; 1092 } 1093 } 1094 } 1095 1096 PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1)); 1097 PetscCall(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2)); 1098 1099 PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 1100 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1101 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1102 PetscCall(VecScatterDestroy(&vecscat)); 1103 PetscCall(ISDestroy(&list1)); 1104 PetscCall(ISDestroy(&list2)); 1105 PetscCall(PetscFree(indx1)); 1106 PetscCall(PetscFree(indx2)); 1107 #endif 1108 break; 1109 default: 1110 #if defined(PETSC_USE_COMPLEX) 1111 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1112 1113 PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1)); 1114 PetscCall(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2)); 1115 PetscCall(VecScatterCreate(x,list1,y,list2,&vecscat)); 1116 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1117 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1118 PetscCall(VecScatterDestroy(&vecscat)); 1119 PetscCall(ISDestroy(&list1)); 1120 PetscCall(ISDestroy(&list2)); 1121 #else 1122 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1123 1124 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1125 1126 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1127 1128 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1129 1130 partial_dim = fftw->partial_dim; 1131 1132 PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1)); 1133 PetscCall(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2)); 1134 1135 if (dim[ndim-1]%2==0) NM = 2; 1136 else NM = 1; 1137 1138 j = low; 1139 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1140 indx1[i] = local_0_start*partial_dim + i; 1141 indx2[i] = j; 1142 if (k%dim[ndim-1]==0) j+=NM; 1143 j++; 1144 } 1145 PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1)); 1146 PetscCall(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2)); 1147 1148 PetscCall(VecScatterCreate(x,list2,y,list1,&vecscat)); 1149 PetscCall(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1150 PetscCall(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1151 PetscCall(VecScatterDestroy(&vecscat)); 1152 PetscCall(ISDestroy(&list1)); 1153 PetscCall(ISDestroy(&list2)); 1154 PetscCall(PetscFree(indx1)); 1155 PetscCall(PetscFree(indx2)); 1156 #endif 1157 break; 1158 } 1159 #endif 1160 } 1161 PetscFunctionReturn(0); 1162 } 1163 1164 /* 1165 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1166 1167 Options Database Keys: 1168 + -mat_fftw_plannerflags - set FFTW planner flags 1169 1170 Level: intermediate 1171 1172 */ 1173 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1174 { 1175 MPI_Comm comm; 1176 Mat_FFT *fft = (Mat_FFT*)A->data; 1177 Mat_FFTW *fftw; 1178 PetscInt ndim = fft->ndim,*dim = fft->dim; 1179 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1180 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1181 PetscBool flg; 1182 PetscInt p_flag,partial_dim=1,ctr; 1183 PetscMPIInt size,rank; 1184 ptrdiff_t *pdim; 1185 #if !defined(PETSC_USE_COMPLEX) 1186 PetscInt tot_dim; 1187 #endif 1188 1189 PetscFunctionBegin; 1190 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1191 PetscCallMPI(MPI_Comm_size(comm, &size)); 1192 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1193 1194 #if !PetscDefined(HAVE_MPIUNI) 1195 fftw_mpi_init(); 1196 #endif 1197 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1198 pdim[0] = dim[0]; 1199 #if !defined(PETSC_USE_COMPLEX) 1200 tot_dim = 2*dim[0]; 1201 #endif 1202 for (ctr=1; ctr<ndim; ctr++) { 1203 partial_dim *= dim[ctr]; 1204 pdim[ctr] = dim[ctr]; 1205 #if !defined(PETSC_USE_COMPLEX) 1206 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1207 else tot_dim *= dim[ctr]; 1208 #endif 1209 } 1210 1211 if (size == 1) { 1212 #if defined(PETSC_USE_COMPLEX) 1213 PetscCall(MatSetSizes(A,fft->N,fft->N,fft->N,fft->N)); 1214 fft->n = fft->N; 1215 #else 1216 PetscCall(MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim)); 1217 fft->n = tot_dim; 1218 #endif 1219 #if !PetscDefined(HAVE_MPIUNI) 1220 } else { 1221 ptrdiff_t local_n0,local_0_start,local_n1,local_1_start; 1222 #if !defined(PETSC_USE_COMPLEX) 1223 ptrdiff_t temp; 1224 PetscInt N1; 1225 #endif 1226 1227 switch (ndim) { 1228 case 1: 1229 #if !defined(PETSC_USE_COMPLEX) 1230 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1231 #else 1232 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1233 fft->n = (PetscInt)local_n0; 1234 PetscCall(MatSetSizes(A,local_n1,fft->n,fft->N,fft->N)); 1235 #endif 1236 break; 1237 case 2: 1238 #if defined(PETSC_USE_COMPLEX) 1239 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1240 fft->n = (PetscInt)local_n0*dim[1]; 1241 PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 1242 #else 1243 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1244 1245 fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1246 PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1))); 1247 #endif 1248 break; 1249 case 3: 1250 #if defined(PETSC_USE_COMPLEX) 1251 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1252 1253 fft->n = (PetscInt)local_n0*dim[1]*dim[2]; 1254 PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 1255 #else 1256 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1257 1258 fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1259 PetscCall(MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1))); 1260 #endif 1261 break; 1262 default: 1263 #if defined(PETSC_USE_COMPLEX) 1264 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1265 1266 fft->n = (PetscInt)local_n0*partial_dim; 1267 PetscCall(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 1268 #else 1269 temp = pdim[ndim-1]; 1270 1271 pdim[ndim-1] = temp/2 + 1; 1272 1273 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1274 1275 fft->n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1276 N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1277 1278 pdim[ndim-1] = temp; 1279 1280 PetscCall(MatSetSizes(A,fft->n,fft->n,N1,N1)); 1281 #endif 1282 break; 1283 } 1284 #endif 1285 } 1286 free(pdim); 1287 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATFFTW)); 1288 PetscCall(PetscNewLog(A,&fftw)); 1289 fft->data = (void*)fftw; 1290 1291 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1292 fftw->partial_dim = partial_dim; 1293 1294 PetscCall(PetscMalloc1(ndim, &(fftw->dim_fftw))); 1295 if (size == 1) { 1296 #if defined(PETSC_USE_64BIT_INDICES) 1297 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1298 #else 1299 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1300 #endif 1301 } 1302 1303 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1304 1305 fftw->p_forward = NULL; 1306 fftw->p_backward = NULL; 1307 fftw->p_flag = FFTW_ESTIMATE; 1308 1309 if (size == 1) { 1310 A->ops->mult = MatMult_SeqFFTW; 1311 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1312 #if !PetscDefined(HAVE_MPIUNI) 1313 } else { 1314 A->ops->mult = MatMult_MPIFFTW; 1315 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1316 #endif 1317 } 1318 fft->matdestroy = MatDestroy_FFTW; 1319 A->assembled = PETSC_TRUE; 1320 A->preallocated = PETSC_TRUE; 1321 1322 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW)); 1323 PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW)); 1324 PetscCall(PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW)); 1325 1326 /* get runtime options */ 1327 PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat"); 1328 PetscCall(PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg)); 1329 if (flg) fftw->p_flag = iplans[p_flag]; 1330 PetscOptionsEnd(); 1331 PetscFunctionReturn(0); 1332 } 1333