1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 #if defined(PETSC_USE_64BIT_INDICES) 15 fftw_iodim64 *iodims; 16 #else 17 fftw_iodim *iodims; 18 #endif 19 PetscInt partial_dim; 20 fftw_plan p_forward,p_backward; 21 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23 executed for the arrays with which the plan was created */ 24 } Mat_FFTW; 25 26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30 extern PetscErrorCode MatDestroy_FFTW(Mat); 31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32 33 /* MatMult_SeqFFTW performs forward DFT in parallel 34 Input parameter: 35 A - the matrix 36 x - the vector on which FDFT will be performed 37 38 Output parameter: 39 y - vector that stores result of FDFT 40 */ 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatMult_SeqFFTW" 43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 44 { 45 PetscErrorCode ierr; 46 Mat_FFT *fft = (Mat_FFT*)A->data; 47 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 48 const PetscScalar *x_array; 49 PetscScalar *y_array; 50 #if defined(PETSC_USE_COMPLEX) 51 #if defined(PETSC_USE_64BIT_INDICES) 52 fftw_iodim64 *iodims; 53 #else 54 fftw_iodim *iodims; 55 #endif 56 PetscInt i; 57 #endif 58 PetscInt ndim = fft->ndim,*dim = fft->dim; 59 60 PetscFunctionBegin; 61 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 62 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 63 if (!fftw->p_forward) { /* create a plan, then excute it */ 64 switch (ndim) { 65 case 1: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 case 2: 73 #if defined(PETSC_USE_COMPLEX) 74 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 75 #else 76 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 77 #endif 78 break; 79 case 3: 80 #if defined(PETSC_USE_COMPLEX) 81 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 82 #else 83 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 84 #endif 85 break; 86 default: 87 #if defined(PETSC_USE_COMPLEX) 88 iodims = fftw->iodims; 89 #if defined(PETSC_USE_64BIT_INDICES) 90 if (ndim) { 91 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 92 iodims[ndim-1].is = iodims[ndim-1].os = 1; 93 for (i=ndim-2; i>=0; --i) { 94 iodims[i].n = (ptrdiff_t)dim[i]; 95 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 96 } 97 } 98 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 99 #else 100 if (ndim) { 101 iodims[ndim-1].n = (int)dim[ndim-1]; 102 iodims[ndim-1].is = iodims[ndim-1].os = 1; 103 for (i=ndim-2; i>=0; --i) { 104 iodims[i].n = (int)dim[i]; 105 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 106 } 107 } 108 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 109 #endif 110 111 #else 112 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 113 #endif 114 break; 115 } 116 fftw->finarray = (PetscScalar *) x_array; 117 fftw->foutarray = y_array; 118 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 119 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 120 fftw_execute(fftw->p_forward); 121 } else { /* use existing plan */ 122 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 123 #if defined(PETSC_USE_COMPLEX) 124 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 125 #else 126 fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 127 #endif 128 } else { 129 fftw_execute(fftw->p_forward); 130 } 131 } 132 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 133 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 /* MatMultTranspose_SeqFFTW performs serial backward DFT 138 Input parameter: 139 A - the matrix 140 x - the vector on which BDFT will be performed 141 142 Output parameter: 143 y - vector that stores result of BDFT 144 */ 145 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 148 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 149 { 150 PetscErrorCode ierr; 151 Mat_FFT *fft = (Mat_FFT*)A->data; 152 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 153 const PetscScalar *x_array; 154 PetscScalar *y_array; 155 PetscInt ndim=fft->ndim,*dim=fft->dim; 156 #if defined(PETSC_USE_COMPLEX) 157 #if defined(PETSC_USE_64BIT_INDICES) 158 fftw_iodim64 *iodims=fftw->iodims; 159 #else 160 fftw_iodim *iodims=fftw->iodims; 161 #endif 162 #endif 163 164 PetscFunctionBegin; 165 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 166 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 167 if (!fftw->p_backward) { /* create a plan, then excute it */ 168 switch (ndim) { 169 case 1: 170 #if defined(PETSC_USE_COMPLEX) 171 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 172 #else 173 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 174 #endif 175 break; 176 case 2: 177 #if defined(PETSC_USE_COMPLEX) 178 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 179 #else 180 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 181 #endif 182 break; 183 case 3: 184 #if defined(PETSC_USE_COMPLEX) 185 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 186 #else 187 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 188 #endif 189 break; 190 default: 191 #if defined(PETSC_USE_COMPLEX) 192 #if defined(PETSC_USE_64BIT_INDICES) 193 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 194 #else 195 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 196 #endif 197 #else 198 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 199 #endif 200 break; 201 } 202 fftw->binarray = (PetscScalar *) x_array; 203 fftw->boutarray = y_array; 204 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 205 } else { /* use existing plan */ 206 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 207 #if defined(PETSC_USE_COMPLEX) 208 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 209 #else 210 fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 211 #endif 212 } else { 213 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 214 } 215 } 216 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 217 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 218 PetscFunctionReturn(0); 219 } 220 221 /* MatMult_MPIFFTW performs forward DFT in parallel 222 Input parameter: 223 A - the matrix 224 x - the vector on which FDFT will be performed 225 226 Output parameter: 227 y - vector that stores result of FDFT 228 */ 229 #undef __FUNCT__ 230 #define __FUNCT__ "MatMult_MPIFFTW" 231 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 232 { 233 PetscErrorCode ierr; 234 Mat_FFT *fft = (Mat_FFT*)A->data; 235 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 236 const PetscScalar *x_array; 237 PetscScalar *y_array; 238 PetscInt ndim=fft->ndim,*dim=fft->dim; 239 MPI_Comm comm; 240 241 PetscFunctionBegin; 242 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 243 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 244 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 245 if (!fftw->p_forward) { /* create a plan, then excute it */ 246 switch (ndim) { 247 case 1: 248 #if defined(PETSC_USE_COMPLEX) 249 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 250 #else 251 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 252 #endif 253 break; 254 case 2: 255 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 256 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 257 #else 258 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 259 #endif 260 break; 261 case 3: 262 #if defined(PETSC_USE_COMPLEX) 263 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 264 #else 265 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 266 #endif 267 break; 268 default: 269 #if defined(PETSC_USE_COMPLEX) 270 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 271 #else 272 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 273 #endif 274 break; 275 } 276 fftw->finarray = (PetscScalar *) x_array; 277 fftw->foutarray = y_array; 278 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 279 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 280 fftw_execute(fftw->p_forward); 281 } else { /* use existing plan */ 282 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 283 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 284 } else { 285 fftw_execute(fftw->p_forward); 286 } 287 } 288 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 289 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 294 Input parameter: 295 A - the matrix 296 x - the vector on which BDFT will be performed 297 298 Output parameter: 299 y - vector that stores result of BDFT 300 */ 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 303 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 304 { 305 PetscErrorCode ierr; 306 Mat_FFT *fft = (Mat_FFT*)A->data; 307 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 308 const PetscScalar *x_array; 309 PetscScalar *y_array; 310 PetscInt ndim=fft->ndim,*dim=fft->dim; 311 MPI_Comm comm; 312 313 PetscFunctionBegin; 314 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 315 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 316 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 317 if (!fftw->p_backward) { /* create a plan, then excute it */ 318 switch (ndim) { 319 case 1: 320 #if defined(PETSC_USE_COMPLEX) 321 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 322 #else 323 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 324 #endif 325 break; 326 case 2: 327 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 328 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 329 #else 330 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 331 #endif 332 break; 333 case 3: 334 #if defined(PETSC_USE_COMPLEX) 335 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 336 #else 337 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 338 #endif 339 break; 340 default: 341 #if defined(PETSC_USE_COMPLEX) 342 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 343 #else 344 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 345 #endif 346 break; 347 } 348 fftw->binarray = (PetscScalar *) x_array; 349 fftw->boutarray = y_array; 350 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 351 } else { /* use existing plan */ 352 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 353 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 354 } else { 355 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 356 } 357 } 358 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 359 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatDestroy_FFTW" 365 PetscErrorCode MatDestroy_FFTW(Mat A) 366 { 367 Mat_FFT *fft = (Mat_FFT*)A->data; 368 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 369 PetscErrorCode ierr; 370 371 PetscFunctionBegin; 372 fftw_destroy_plan(fftw->p_forward); 373 fftw_destroy_plan(fftw->p_backward); 374 if (fftw->iodims) { 375 free(fftw->iodims); 376 } 377 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 378 ierr = PetscFree(fft->data);CHKERRQ(ierr); 379 fftw_mpi_cleanup(); 380 PetscFunctionReturn(0); 381 } 382 383 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 384 #undef __FUNCT__ 385 #define __FUNCT__ "VecDestroy_MPIFFTW" 386 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 387 { 388 PetscErrorCode ierr; 389 PetscScalar *array; 390 391 PetscFunctionBegin; 392 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 393 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 394 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 395 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 396 PetscFunctionReturn(0); 397 } 398 399 #undef __FUNCT__ 400 #define __FUNCT__ "MatCreateVecsFFTW" 401 /*@ 402 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 403 parallel layout determined by FFTW 404 405 Collective on Mat 406 407 Input Parameter: 408 . A - the matrix 409 410 Output Parameter: 411 + x - (optional) input vector of forward FFTW 412 - y - (optional) output vector of forward FFTW 413 - z - (optional) output vector of backward FFTW 414 415 Level: advanced 416 417 Note: The parallel layout of output of forward FFTW is always same as the input 418 of the backward FFTW. But parallel layout of the input vector of forward 419 FFTW might not be same as the output of backward FFTW. 420 Also note that we need to provide enough space while doing parallel real transform. 421 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 422 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 423 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 424 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 425 zeros if it is odd only one zero is needed. 426 Lastly one needs some scratch space at the end of data set in each process. alloc_local 427 figures out how much space is needed, i.e. it figures out the data+scratch space for 428 each processor and returns that. 429 430 .seealso: MatCreateFFTW() 431 @*/ 432 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 433 { 434 PetscErrorCode ierr; 435 436 PetscFunctionBegin; 437 ierr = PetscTryMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatCreateVecsFFTW_FFTW" 443 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 444 { 445 PetscErrorCode ierr; 446 PetscMPIInt size,rank; 447 MPI_Comm comm; 448 Mat_FFT *fft = (Mat_FFT*)A->data; 449 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 450 PetscInt N = fft->N; 451 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 452 453 PetscFunctionBegin; 454 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 455 PetscValidType(A,1); 456 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 457 458 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 459 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 460 if (size == 1) { /* sequential case */ 461 #if defined(PETSC_USE_COMPLEX) 462 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 463 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 464 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 465 #else 466 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 467 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 468 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 469 #endif 470 } else { /* parallel cases */ 471 ptrdiff_t alloc_local,local_n0,local_0_start; 472 ptrdiff_t local_n1; 473 fftw_complex *data_fout; 474 ptrdiff_t local_1_start; 475 #if defined(PETSC_USE_COMPLEX) 476 fftw_complex *data_fin,*data_bout; 477 #else 478 double *data_finr,*data_boutr; 479 PetscInt n1,N1; 480 ptrdiff_t temp; 481 #endif 482 483 switch (ndim) { 484 case 1: 485 #if !defined(PETSC_USE_COMPLEX) 486 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 487 #else 488 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 489 if (fin) { 490 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 491 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 492 493 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 494 } 495 if (fout) { 496 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 497 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 498 499 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 500 } 501 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 502 if (bout) { 503 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 504 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 505 506 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 507 } 508 break; 509 #endif 510 case 2: 511 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 512 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 513 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 514 if (fin) { 515 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 516 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 517 518 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 if (bout) { 521 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 522 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 523 524 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 525 } 526 if (fout) { 527 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 529 530 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 531 } 532 #else 533 /* Get local size */ 534 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 535 if (fin) { 536 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 538 539 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 540 } 541 if (fout) { 542 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 543 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 544 545 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 546 } 547 if (bout) { 548 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 549 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 550 551 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 #endif 554 break; 555 case 3: 556 #if !defined(PETSC_USE_COMPLEX) 557 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 558 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 559 if (fin) { 560 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 561 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 562 563 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 564 } 565 if (bout) { 566 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 567 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 568 569 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 570 } 571 572 if (fout) { 573 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 574 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 575 576 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 577 } 578 #else 579 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 580 if (fin) { 581 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 582 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 583 584 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 585 } 586 if (fout) { 587 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 588 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 589 590 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 591 } 592 if (bout) { 593 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 594 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 595 596 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 597 } 598 #endif 599 break; 600 default: 601 #if !defined(PETSC_USE_COMPLEX) 602 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 603 604 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 605 606 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 607 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 608 609 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 610 611 if (fin) { 612 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 613 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 614 615 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 616 } 617 if (bout) { 618 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 619 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 620 621 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 622 } 623 624 if (fout) { 625 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 626 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 627 628 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 629 } 630 #else 631 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 632 if (fin) { 633 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 634 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 635 636 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 637 } 638 if (fout) { 639 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 640 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 641 642 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 643 } 644 if (bout) { 645 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 646 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 647 648 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 649 } 650 #endif 651 break; 652 } 653 } 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "VecScatterPetscToFFTW" 659 /*@ 660 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 661 662 Collective on Mat 663 664 Input Parameters: 665 + A - FFTW matrix 666 - x - the PETSc vector 667 668 Output Parameters: 669 . y - the FFTW vector 670 671 Options Database Keys: 672 . -mat_fftw_plannerflags - set FFTW planner flags 673 674 Level: intermediate 675 676 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 677 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 678 zeros. This routine does that job by scattering operation. 679 680 .seealso: VecScatterFFTWToPetsc() 681 @*/ 682 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 683 { 684 PetscErrorCode ierr; 685 686 PetscFunctionBegin; 687 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 693 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 694 { 695 PetscErrorCode ierr; 696 MPI_Comm comm; 697 Mat_FFT *fft = (Mat_FFT*)A->data; 698 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 699 PetscInt N =fft->N; 700 PetscInt ndim =fft->ndim,*dim=fft->dim; 701 PetscInt low; 702 PetscMPIInt rank,size; 703 PetscInt vsize,vsize1; 704 ptrdiff_t local_n0,local_0_start; 705 ptrdiff_t local_n1,local_1_start; 706 VecScatter vecscat; 707 IS list1,list2; 708 #if !defined(PETSC_USE_COMPLEX) 709 PetscInt i,j,k,partial_dim; 710 PetscInt *indx1, *indx2, tempindx, tempindx1; 711 PetscInt NM; 712 ptrdiff_t temp; 713 #endif 714 715 PetscFunctionBegin; 716 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 717 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 718 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 719 ierr = VecGetOwnershipRange(y,&low,NULL); 720 721 if (size==1) { 722 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 723 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 724 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 725 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 726 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 727 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 729 ierr = ISDestroy(&list1);CHKERRQ(ierr); 730 } else { 731 switch (ndim) { 732 case 1: 733 #if defined(PETSC_USE_COMPLEX) 734 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 735 736 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 737 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 738 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 739 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 741 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 742 ierr = ISDestroy(&list1);CHKERRQ(ierr); 743 ierr = ISDestroy(&list2);CHKERRQ(ierr); 744 #else 745 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 746 #endif 747 break; 748 case 2: 749 #if defined(PETSC_USE_COMPLEX) 750 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 751 752 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 753 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 754 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 755 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 756 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 757 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 758 ierr = ISDestroy(&list1);CHKERRQ(ierr); 759 ierr = ISDestroy(&list2);CHKERRQ(ierr); 760 #else 761 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 762 763 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 764 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 765 766 if (dim[1]%2==0) { 767 NM = dim[1]+2; 768 } else { 769 NM = dim[1]+1; 770 } 771 for (i=0; i<local_n0; i++) { 772 for (j=0; j<dim[1]; j++) { 773 tempindx = i*dim[1] + j; 774 tempindx1 = i*NM + j; 775 776 indx1[tempindx]=local_0_start*dim[1]+tempindx; 777 indx2[tempindx]=low+tempindx1; 778 } 779 } 780 781 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 782 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 783 784 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 785 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 786 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 787 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 788 ierr = ISDestroy(&list1);CHKERRQ(ierr); 789 ierr = ISDestroy(&list2);CHKERRQ(ierr); 790 ierr = PetscFree(indx1);CHKERRQ(ierr); 791 ierr = PetscFree(indx2);CHKERRQ(ierr); 792 #endif 793 break; 794 795 case 3: 796 #if defined(PETSC_USE_COMPLEX) 797 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 798 799 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 800 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 801 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 802 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 803 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 804 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 805 ierr = ISDestroy(&list1);CHKERRQ(ierr); 806 ierr = ISDestroy(&list2);CHKERRQ(ierr); 807 #else 808 /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 809 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 810 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 811 812 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 813 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 814 815 if (dim[2]%2==0) NM = dim[2]+2; 816 else NM = dim[2]+1; 817 818 for (i=0; i<local_n0; i++) { 819 for (j=0; j<dim[1]; j++) { 820 for (k=0; k<dim[2]; k++) { 821 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 822 tempindx1 = i*dim[1]*NM + j*NM + k; 823 824 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 825 indx2[tempindx]=low+tempindx1; 826 } 827 } 828 } 829 830 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 831 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 832 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 833 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 834 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 836 ierr = ISDestroy(&list1);CHKERRQ(ierr); 837 ierr = ISDestroy(&list2);CHKERRQ(ierr); 838 ierr = PetscFree(indx1);CHKERRQ(ierr); 839 ierr = PetscFree(indx2);CHKERRQ(ierr); 840 #endif 841 break; 842 843 default: 844 #if defined(PETSC_USE_COMPLEX) 845 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 846 847 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 848 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 849 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 850 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 851 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 852 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 853 ierr = ISDestroy(&list1);CHKERRQ(ierr); 854 ierr = ISDestroy(&list2);CHKERRQ(ierr); 855 #else 856 /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 857 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 858 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 859 860 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 861 862 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 863 864 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 865 866 partial_dim = fftw->partial_dim; 867 868 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 869 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 870 871 if (dim[ndim-1]%2==0) NM = 2; 872 else NM = 1; 873 874 j = low; 875 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 876 indx1[i] = local_0_start*partial_dim + i; 877 indx2[i] = j; 878 if (k%dim[ndim-1]==0) j+=NM; 879 j++; 880 } 881 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 882 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 883 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 884 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 887 ierr = ISDestroy(&list1);CHKERRQ(ierr); 888 ierr = ISDestroy(&list2);CHKERRQ(ierr); 889 ierr = PetscFree(indx1);CHKERRQ(ierr); 890 ierr = PetscFree(indx2);CHKERRQ(ierr); 891 #endif 892 break; 893 } 894 } 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "VecScatterFFTWToPetsc" 900 /*@ 901 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 902 903 Collective on Mat 904 905 Input Parameters: 906 + A - FFTW matrix 907 - x - FFTW vector 908 909 Output Parameters: 910 . y - PETSc vector 911 912 Level: intermediate 913 914 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 915 VecScatterFFTWToPetsc removes those extra zeros. 916 917 .seealso: VecScatterPetscToFFTW() 918 @*/ 919 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 920 { 921 PetscErrorCode ierr; 922 923 PetscFunctionBegin; 924 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 930 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 931 { 932 PetscErrorCode ierr; 933 MPI_Comm comm; 934 Mat_FFT *fft = (Mat_FFT*)A->data; 935 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 936 PetscInt N = fft->N; 937 PetscInt ndim = fft->ndim,*dim=fft->dim; 938 PetscInt low; 939 PetscMPIInt rank,size; 940 ptrdiff_t local_n0,local_0_start; 941 ptrdiff_t local_n1,local_1_start; 942 VecScatter vecscat; 943 IS list1,list2; 944 #if !defined(PETSC_USE_COMPLEX) 945 PetscInt i,j,k,partial_dim; 946 PetscInt *indx1, *indx2, tempindx, tempindx1; 947 PetscInt NM; 948 ptrdiff_t temp; 949 #endif 950 951 PetscFunctionBegin; 952 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 953 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 954 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 955 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 956 957 if (size==1) { 958 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 959 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 960 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 963 ierr = ISDestroy(&list1);CHKERRQ(ierr); 964 965 } else { 966 switch (ndim) { 967 case 1: 968 #if defined(PETSC_USE_COMPLEX) 969 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 970 971 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 972 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 973 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 974 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 977 ierr = ISDestroy(&list1);CHKERRQ(ierr); 978 ierr = ISDestroy(&list2);CHKERRQ(ierr); 979 #else 980 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 981 #endif 982 break; 983 case 2: 984 #if defined(PETSC_USE_COMPLEX) 985 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 986 987 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 988 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 989 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 990 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 993 ierr = ISDestroy(&list1);CHKERRQ(ierr); 994 ierr = ISDestroy(&list2);CHKERRQ(ierr); 995 #else 996 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 997 998 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 999 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 1000 1001 if (dim[1]%2==0) NM = dim[1]+2; 1002 else NM = dim[1]+1; 1003 1004 for (i=0; i<local_n0; i++) { 1005 for (j=0; j<dim[1]; j++) { 1006 tempindx = i*dim[1] + j; 1007 tempindx1 = i*NM + j; 1008 1009 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1010 indx2[tempindx]=low+tempindx1; 1011 } 1012 } 1013 1014 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1015 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1016 1017 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1018 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1019 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1020 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1021 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1022 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1023 ierr = PetscFree(indx1);CHKERRQ(ierr); 1024 ierr = PetscFree(indx2);CHKERRQ(ierr); 1025 #endif 1026 break; 1027 case 3: 1028 #if defined(PETSC_USE_COMPLEX) 1029 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1030 1031 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1032 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1033 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1034 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1037 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1038 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1039 #else 1040 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1041 1042 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1043 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1044 1045 if (dim[2]%2==0) NM = dim[2]+2; 1046 else NM = dim[2]+1; 1047 1048 for (i=0; i<local_n0; i++) { 1049 for (j=0; j<dim[1]; j++) { 1050 for (k=0; k<dim[2]; k++) { 1051 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1052 tempindx1 = i*dim[1]*NM + j*NM + k; 1053 1054 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1055 indx2[tempindx]=low+tempindx1; 1056 } 1057 } 1058 } 1059 1060 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1061 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1062 1063 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1064 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1065 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1066 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1067 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1068 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1069 ierr = PetscFree(indx1);CHKERRQ(ierr); 1070 ierr = PetscFree(indx2);CHKERRQ(ierr); 1071 #endif 1072 break; 1073 default: 1074 #if defined(PETSC_USE_COMPLEX) 1075 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1076 1077 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1078 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1079 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1080 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1082 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1083 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1084 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1085 #else 1086 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1087 1088 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1089 1090 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1091 1092 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1093 1094 partial_dim = fftw->partial_dim; 1095 1096 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1097 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1098 1099 if (dim[ndim-1]%2==0) NM = 2; 1100 else NM = 1; 1101 1102 j = low; 1103 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1104 indx1[i] = local_0_start*partial_dim + i; 1105 indx2[i] = j; 1106 if (k%dim[ndim-1]==0) j+=NM; 1107 j++; 1108 } 1109 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1110 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1111 1112 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1113 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1116 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1117 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1118 ierr = PetscFree(indx1);CHKERRQ(ierr); 1119 ierr = PetscFree(indx2);CHKERRQ(ierr); 1120 #endif 1121 break; 1122 } 1123 } 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNCT__ 1128 #define __FUNCT__ "MatCreate_FFTW" 1129 /* 1130 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1131 1132 Options Database Keys: 1133 + -mat_fftw_plannerflags - set FFTW planner flags 1134 1135 Level: intermediate 1136 1137 */ 1138 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1139 { 1140 PetscErrorCode ierr; 1141 MPI_Comm comm; 1142 Mat_FFT *fft=(Mat_FFT*)A->data; 1143 Mat_FFTW *fftw; 1144 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1145 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1146 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1147 PetscBool flg; 1148 PetscInt p_flag,partial_dim=1,ctr; 1149 PetscMPIInt size,rank; 1150 ptrdiff_t *pdim; 1151 ptrdiff_t local_n1,local_1_start; 1152 #if !defined(PETSC_USE_COMPLEX) 1153 ptrdiff_t temp; 1154 PetscInt N1,tot_dim; 1155 #else 1156 PetscInt n1; 1157 #endif 1158 1159 PetscFunctionBegin; 1160 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1161 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1162 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1163 1164 fftw_mpi_init(); 1165 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1166 pdim[0] = dim[0]; 1167 #if !defined(PETSC_USE_COMPLEX) 1168 tot_dim = 2*dim[0]; 1169 #endif 1170 for (ctr=1; ctr<ndim; ctr++) { 1171 partial_dim *= dim[ctr]; 1172 pdim[ctr] = dim[ctr]; 1173 #if !defined(PETSC_USE_COMPLEX) 1174 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1175 else tot_dim *= dim[ctr]; 1176 #endif 1177 } 1178 1179 if (size == 1) { 1180 #if defined(PETSC_USE_COMPLEX) 1181 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1182 n = N; 1183 #else 1184 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1185 n = tot_dim; 1186 #endif 1187 1188 } else { 1189 ptrdiff_t local_n0,local_0_start; 1190 switch (ndim) { 1191 case 1: 1192 #if !defined(PETSC_USE_COMPLEX) 1193 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1194 #else 1195 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1196 1197 n = (PetscInt)local_n0; 1198 n1 = (PetscInt)local_n1; 1199 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1200 #endif 1201 break; 1202 case 2: 1203 #if defined(PETSC_USE_COMPLEX) 1204 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1205 /* 1206 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1207 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1208 */ 1209 n = (PetscInt)local_n0*dim[1]; 1210 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1211 #else 1212 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1213 1214 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1215 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1216 #endif 1217 break; 1218 case 3: 1219 #if defined(PETSC_USE_COMPLEX) 1220 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1221 1222 n = (PetscInt)local_n0*dim[1]*dim[2]; 1223 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1224 #else 1225 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1226 1227 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1228 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1229 #endif 1230 break; 1231 default: 1232 #if defined(PETSC_USE_COMPLEX) 1233 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1234 1235 n = (PetscInt)local_n0*partial_dim; 1236 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1237 #else 1238 temp = pdim[ndim-1]; 1239 1240 pdim[ndim-1] = temp/2 + 1; 1241 1242 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1243 1244 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1245 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1246 1247 pdim[ndim-1] = temp; 1248 1249 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1250 #endif 1251 break; 1252 } 1253 } 1254 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1255 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1256 fft->data = (void*)fftw; 1257 1258 fft->n = n; 1259 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1260 fftw->partial_dim = partial_dim; 1261 1262 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1263 if (size == 1) { 1264 #if defined(PETSC_USE_64BIT_INDICES) 1265 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1266 #else 1267 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1268 #endif 1269 } 1270 1271 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1272 1273 fftw->p_forward = 0; 1274 fftw->p_backward = 0; 1275 fftw->p_flag = FFTW_ESTIMATE; 1276 1277 if (size == 1) { 1278 A->ops->mult = MatMult_SeqFFTW; 1279 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1280 } else { 1281 A->ops->mult = MatMult_MPIFFTW; 1282 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1283 } 1284 fft->matdestroy = MatDestroy_FFTW; 1285 A->assembled = PETSC_TRUE; 1286 A->preallocated = PETSC_TRUE; 1287 1288 ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1291 1292 /* get runtime options */ 1293 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1294 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1295 if (flg) { 1296 fftw->p_flag = iplans[p_flag]; 1297 } 1298 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1299 PetscFunctionReturn(0); 1300 } 1301 1302 1303 1304 1305