1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 #if defined(PETSC_USE_64BIT_INDICES) 15 fftw_iodim64 *iodims; 16 #else 17 fftw_iodim *iodims; 18 #endif 19 PetscInt partial_dim; 20 fftw_plan p_forward,p_backward; 21 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23 executed for the arrays with which the plan was created */ 24 } Mat_FFTW; 25 26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30 extern PetscErrorCode MatDestroy_FFTW(Mat); 31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32 33 /* MatMult_SeqFFTW performs forward DFT in parallel 34 Input parameter: 35 A - the matrix 36 x - the vector on which FDFT will be performed 37 38 Output parameter: 39 y - vector that stores result of FDFT 40 */ 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatMult_SeqFFTW" 43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 44 { 45 PetscErrorCode ierr; 46 Mat_FFT *fft = (Mat_FFT*)A->data; 47 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 48 const PetscScalar *x_array; 49 PetscScalar *y_array; 50 #if defined(PETSC_USE_COMPLEX) 51 #if defined(PETSC_USE_64BIT_INDICES) 52 fftw_iodim64 *iodims; 53 #else 54 fftw_iodim *iodims; 55 #endif 56 PetscInt i; 57 #endif 58 PetscInt ndim = fft->ndim,*dim = fft->dim; 59 60 PetscFunctionBegin; 61 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 62 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 63 if (!fftw->p_forward) { /* create a plan, then excute it */ 64 switch (ndim) { 65 case 1: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 case 2: 73 #if defined(PETSC_USE_COMPLEX) 74 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 75 #else 76 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 77 #endif 78 break; 79 case 3: 80 #if defined(PETSC_USE_COMPLEX) 81 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 82 #else 83 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 84 #endif 85 break; 86 default: 87 #if defined(PETSC_USE_COMPLEX) 88 iodims = fftw->iodims; 89 #if defined(PETSC_USE_64BIT_INDICES) 90 if (ndim) { 91 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 92 iodims[ndim-1].is = iodims[ndim-1].os = 1; 93 for (i=ndim-2; i>=0; --i) { 94 iodims[i].n = (ptrdiff_t)dim[i]; 95 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 96 } 97 } 98 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 99 #else 100 if (ndim) { 101 iodims[ndim-1].n = (int)dim[ndim-1]; 102 iodims[ndim-1].is = iodims[ndim-1].os = 1; 103 for (i=ndim-2; i>=0; --i) { 104 iodims[i].n = (int)dim[i]; 105 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 106 } 107 } 108 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 109 #endif 110 111 #else 112 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 113 #endif 114 break; 115 } 116 fftw->finarray = (PetscScalar *) x_array; 117 fftw->foutarray = y_array; 118 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 119 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 120 fftw_execute(fftw->p_forward); 121 } else { /* use existing plan */ 122 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 123 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 124 } else { 125 fftw_execute(fftw->p_forward); 126 } 127 } 128 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 129 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 /* MatMultTranspose_SeqFFTW performs serial backward DFT 134 Input parameter: 135 A - the matrix 136 x - the vector on which BDFT will be performed 137 138 Output parameter: 139 y - vector that stores result of BDFT 140 */ 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 144 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 145 { 146 PetscErrorCode ierr; 147 Mat_FFT *fft = (Mat_FFT*)A->data; 148 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 149 const PetscScalar *x_array; 150 PetscScalar *y_array; 151 PetscInt ndim=fft->ndim,*dim=fft->dim; 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_64BIT_INDICES) 154 fftw_iodim64 *iodims=fftw->iodims; 155 #else 156 fftw_iodim *iodims=fftw->iodims; 157 #endif 158 #endif 159 160 PetscFunctionBegin; 161 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 162 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163 if (!fftw->p_backward) { /* create a plan, then excute it */ 164 switch (ndim) { 165 case 1: 166 #if defined(PETSC_USE_COMPLEX) 167 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 168 #else 169 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 170 #endif 171 break; 172 case 2: 173 #if defined(PETSC_USE_COMPLEX) 174 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 175 #else 176 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 177 #endif 178 break; 179 case 3: 180 #if defined(PETSC_USE_COMPLEX) 181 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 182 #else 183 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 184 #endif 185 break; 186 default: 187 #if defined(PETSC_USE_COMPLEX) 188 #if defined(PETSC_USE_64BIT_INDICES) 189 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 190 #else 191 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 192 #endif 193 #else 194 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 195 #endif 196 break; 197 } 198 fftw->binarray = (PetscScalar *) x_array; 199 fftw->boutarray = y_array; 200 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 201 } else { /* use existing plan */ 202 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 203 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 204 } else { 205 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 206 } 207 } 208 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 209 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 210 PetscFunctionReturn(0); 211 } 212 213 /* MatMult_MPIFFTW performs forward DFT in parallel 214 Input parameter: 215 A - the matrix 216 x - the vector on which FDFT will be performed 217 218 Output parameter: 219 y - vector that stores result of FDFT 220 */ 221 #undef __FUNCT__ 222 #define __FUNCT__ "MatMult_MPIFFTW" 223 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 224 { 225 PetscErrorCode ierr; 226 Mat_FFT *fft = (Mat_FFT*)A->data; 227 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 228 const PetscScalar *x_array; 229 PetscScalar *y_array; 230 PetscInt ndim=fft->ndim,*dim=fft->dim; 231 MPI_Comm comm; 232 233 PetscFunctionBegin; 234 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 235 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 236 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 237 if (!fftw->p_forward) { /* create a plan, then excute it */ 238 switch (ndim) { 239 case 1: 240 #if defined(PETSC_USE_COMPLEX) 241 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 242 #else 243 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 244 #endif 245 break; 246 case 2: 247 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 248 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 249 #else 250 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 251 #endif 252 break; 253 case 3: 254 #if defined(PETSC_USE_COMPLEX) 255 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 256 #else 257 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 258 #endif 259 break; 260 default: 261 #if defined(PETSC_USE_COMPLEX) 262 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 263 #else 264 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 265 #endif 266 break; 267 } 268 fftw->finarray = (PetscScalar *) x_array; 269 fftw->foutarray = y_array; 270 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 271 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 272 fftw_execute(fftw->p_forward); 273 } else { /* use existing plan */ 274 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 275 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 276 } else { 277 fftw_execute(fftw->p_forward); 278 } 279 } 280 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 281 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 286 Input parameter: 287 A - the matrix 288 x - the vector on which BDFT will be performed 289 290 Output parameter: 291 y - vector that stores result of BDFT 292 */ 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 295 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 296 { 297 PetscErrorCode ierr; 298 Mat_FFT *fft = (Mat_FFT*)A->data; 299 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 300 const PetscScalar *x_array; 301 PetscScalar *y_array; 302 PetscInt ndim=fft->ndim,*dim=fft->dim; 303 MPI_Comm comm; 304 305 PetscFunctionBegin; 306 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 307 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 308 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 309 if (!fftw->p_backward) { /* create a plan, then excute it */ 310 switch (ndim) { 311 case 1: 312 #if defined(PETSC_USE_COMPLEX) 313 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 314 #else 315 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 316 #endif 317 break; 318 case 2: 319 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 320 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 321 #else 322 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 323 #endif 324 break; 325 case 3: 326 #if defined(PETSC_USE_COMPLEX) 327 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 328 #else 329 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 330 #endif 331 break; 332 default: 333 #if defined(PETSC_USE_COMPLEX) 334 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 335 #else 336 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 337 #endif 338 break; 339 } 340 fftw->binarray = (PetscScalar *) x_array; 341 fftw->boutarray = y_array; 342 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 343 } else { /* use existing plan */ 344 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 345 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 346 } else { 347 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 348 } 349 } 350 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 351 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatDestroy_FFTW" 357 PetscErrorCode MatDestroy_FFTW(Mat A) 358 { 359 Mat_FFT *fft = (Mat_FFT*)A->data; 360 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 361 PetscErrorCode ierr; 362 363 PetscFunctionBegin; 364 fftw_destroy_plan(fftw->p_forward); 365 fftw_destroy_plan(fftw->p_backward); 366 if (fftw->iodims) { 367 free(fftw->iodims); 368 } 369 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 370 ierr = PetscFree(fft->data);CHKERRQ(ierr); 371 fftw_mpi_cleanup(); 372 PetscFunctionReturn(0); 373 } 374 375 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 376 #undef __FUNCT__ 377 #define __FUNCT__ "VecDestroy_MPIFFTW" 378 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 379 { 380 PetscErrorCode ierr; 381 PetscScalar *array; 382 383 PetscFunctionBegin; 384 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 385 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 386 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 387 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatCreateVecsFFTW" 393 /*@ 394 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 395 parallel layout determined by FFTW 396 397 Collective on Mat 398 399 Input Parameter: 400 . A - the matrix 401 402 Output Parameter: 403 + x - (optional) input vector of forward FFTW 404 - y - (optional) output vector of forward FFTW 405 - z - (optional) output vector of backward FFTW 406 407 Level: advanced 408 409 Note: The parallel layout of output of forward FFTW is always same as the input 410 of the backward FFTW. But parallel layout of the input vector of forward 411 FFTW might not be same as the output of backward FFTW. 412 Also note that we need to provide enough space while doing parallel real transform. 413 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 414 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 415 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 416 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 417 zeros if it is odd only one zero is needed. 418 Lastly one needs some scratch space at the end of data set in each process. alloc_local 419 figures out how much space is needed, i.e. it figures out the data+scratch space for 420 each processor and returns that. 421 422 .seealso: MatCreateFFTW() 423 @*/ 424 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 425 { 426 PetscErrorCode ierr; 427 428 PetscFunctionBegin; 429 ierr = PetscTryMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatCreateVecsFFTW_FFTW" 435 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 436 { 437 PetscErrorCode ierr; 438 PetscMPIInt size,rank; 439 MPI_Comm comm; 440 Mat_FFT *fft = (Mat_FFT*)A->data; 441 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 442 PetscInt N = fft->N; 443 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 444 445 PetscFunctionBegin; 446 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 447 PetscValidType(A,1); 448 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 449 450 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 451 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 452 if (size == 1) { /* sequential case */ 453 #if defined(PETSC_USE_COMPLEX) 454 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 455 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 456 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 457 #else 458 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 459 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 460 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 461 #endif 462 } else { /* parallel cases */ 463 ptrdiff_t alloc_local,local_n0,local_0_start; 464 ptrdiff_t local_n1; 465 fftw_complex *data_fout; 466 ptrdiff_t local_1_start; 467 #if defined(PETSC_USE_COMPLEX) 468 fftw_complex *data_fin,*data_bout; 469 #else 470 double *data_finr,*data_boutr; 471 PetscInt n1,N1; 472 ptrdiff_t temp; 473 #endif 474 475 switch (ndim) { 476 case 1: 477 #if !defined(PETSC_USE_COMPLEX) 478 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 479 #else 480 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 481 if (fin) { 482 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 484 485 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 486 } 487 if (fout) { 488 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 489 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 490 491 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 492 } 493 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 494 if (bout) { 495 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 496 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 497 498 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 499 } 500 break; 501 #endif 502 case 2: 503 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 504 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 505 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 506 if (fin) { 507 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 508 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 509 510 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 511 } 512 if (bout) { 513 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 514 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 515 516 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 517 } 518 if (fout) { 519 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 520 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 521 522 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 523 } 524 #else 525 /* Get local size */ 526 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 527 if (fin) { 528 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 529 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 530 531 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 532 } 533 if (fout) { 534 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 535 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 536 537 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 538 } 539 if (bout) { 540 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 541 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 542 543 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 544 } 545 #endif 546 break; 547 case 3: 548 #if !defined(PETSC_USE_COMPLEX) 549 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 550 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 551 if (fin) { 552 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 553 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 554 555 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 556 } 557 if (bout) { 558 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 559 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 560 561 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 562 } 563 564 if (fout) { 565 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 566 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 567 568 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 569 } 570 #else 571 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 572 if (fin) { 573 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 574 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 575 576 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 577 } 578 if (fout) { 579 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 580 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 581 582 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 583 } 584 if (bout) { 585 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 586 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 587 588 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 589 } 590 #endif 591 break; 592 default: 593 #if !defined(PETSC_USE_COMPLEX) 594 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 595 596 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 597 598 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 599 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 600 601 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 602 603 if (fin) { 604 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 605 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 606 607 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 608 } 609 if (bout) { 610 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 611 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 612 613 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 614 } 615 616 if (fout) { 617 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 618 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 619 620 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 621 } 622 #else 623 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 624 if (fin) { 625 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 626 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 627 628 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 629 } 630 if (fout) { 631 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 632 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 633 634 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 635 } 636 if (bout) { 637 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 638 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 639 640 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 641 } 642 #endif 643 break; 644 } 645 } 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "VecScatterPetscToFFTW" 651 /*@ 652 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 653 654 Collective on Mat 655 656 Input Parameters: 657 + A - FFTW matrix 658 - x - the PETSc vector 659 660 Output Parameters: 661 . y - the FFTW vector 662 663 Options Database Keys: 664 . -mat_fftw_plannerflags - set FFTW planner flags 665 666 Level: intermediate 667 668 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 669 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 670 zeros. This routine does that job by scattering operation. 671 672 .seealso: VecScatterFFTWToPetsc() 673 @*/ 674 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 675 { 676 PetscErrorCode ierr; 677 678 PetscFunctionBegin; 679 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 680 PetscFunctionReturn(0); 681 } 682 683 #undef __FUNCT__ 684 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 685 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 686 { 687 PetscErrorCode ierr; 688 MPI_Comm comm; 689 Mat_FFT *fft = (Mat_FFT*)A->data; 690 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 691 PetscInt N =fft->N; 692 PetscInt ndim =fft->ndim,*dim=fft->dim; 693 PetscInt low; 694 PetscMPIInt rank,size; 695 PetscInt vsize,vsize1; 696 ptrdiff_t local_n0,local_0_start; 697 ptrdiff_t local_n1,local_1_start; 698 VecScatter vecscat; 699 IS list1,list2; 700 #if !defined(PETSC_USE_COMPLEX) 701 PetscInt i,j,k,partial_dim; 702 PetscInt *indx1, *indx2, tempindx, tempindx1; 703 PetscInt NM; 704 ptrdiff_t temp; 705 #endif 706 707 PetscFunctionBegin; 708 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 709 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 710 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 711 ierr = VecGetOwnershipRange(y,&low,NULL); 712 713 if (size==1) { 714 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 715 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 716 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 717 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 718 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 719 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 721 ierr = ISDestroy(&list1);CHKERRQ(ierr); 722 } else { 723 switch (ndim) { 724 case 1: 725 #if defined(PETSC_USE_COMPLEX) 726 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 727 728 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 729 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 730 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 731 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 732 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 733 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 734 ierr = ISDestroy(&list1);CHKERRQ(ierr); 735 ierr = ISDestroy(&list2);CHKERRQ(ierr); 736 #else 737 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 738 #endif 739 break; 740 case 2: 741 #if defined(PETSC_USE_COMPLEX) 742 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 743 744 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 745 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 746 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 747 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 748 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 750 ierr = ISDestroy(&list1);CHKERRQ(ierr); 751 ierr = ISDestroy(&list2);CHKERRQ(ierr); 752 #else 753 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 754 755 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 756 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 757 758 if (dim[1]%2==0) { 759 NM = dim[1]+2; 760 } else { 761 NM = dim[1]+1; 762 } 763 for (i=0; i<local_n0; i++) { 764 for (j=0; j<dim[1]; j++) { 765 tempindx = i*dim[1] + j; 766 tempindx1 = i*NM + j; 767 768 indx1[tempindx]=local_0_start*dim[1]+tempindx; 769 indx2[tempindx]=low+tempindx1; 770 } 771 } 772 773 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 774 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 775 776 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 777 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 778 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 779 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 780 ierr = ISDestroy(&list1);CHKERRQ(ierr); 781 ierr = ISDestroy(&list2);CHKERRQ(ierr); 782 ierr = PetscFree(indx1);CHKERRQ(ierr); 783 ierr = PetscFree(indx2);CHKERRQ(ierr); 784 #endif 785 break; 786 787 case 3: 788 #if defined(PETSC_USE_COMPLEX) 789 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 790 791 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 792 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 793 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 794 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 795 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 797 ierr = ISDestroy(&list1);CHKERRQ(ierr); 798 ierr = ISDestroy(&list2);CHKERRQ(ierr); 799 #else 800 /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 801 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 802 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 803 804 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 805 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 806 807 if (dim[2]%2==0) NM = dim[2]+2; 808 else NM = dim[2]+1; 809 810 for (i=0; i<local_n0; i++) { 811 for (j=0; j<dim[1]; j++) { 812 for (k=0; k<dim[2]; k++) { 813 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 814 tempindx1 = i*dim[1]*NM + j*NM + k; 815 816 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 817 indx2[tempindx]=low+tempindx1; 818 } 819 } 820 } 821 822 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 823 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 824 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 825 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 828 ierr = ISDestroy(&list1);CHKERRQ(ierr); 829 ierr = ISDestroy(&list2);CHKERRQ(ierr); 830 ierr = PetscFree(indx1);CHKERRQ(ierr); 831 ierr = PetscFree(indx2);CHKERRQ(ierr); 832 #endif 833 break; 834 835 default: 836 #if defined(PETSC_USE_COMPLEX) 837 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 838 839 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 840 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 841 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 842 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 845 ierr = ISDestroy(&list1);CHKERRQ(ierr); 846 ierr = ISDestroy(&list2);CHKERRQ(ierr); 847 #else 848 /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 849 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 850 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 851 852 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 853 854 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 855 856 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 857 858 partial_dim = fftw->partial_dim; 859 860 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 861 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 862 863 if (dim[ndim-1]%2==0) NM = 2; 864 else NM = 1; 865 866 j = low; 867 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 868 indx1[i] = local_0_start*partial_dim + i; 869 indx2[i] = j; 870 if (k%dim[ndim-1]==0) j+=NM; 871 j++; 872 } 873 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 874 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 875 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 876 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 879 ierr = ISDestroy(&list1);CHKERRQ(ierr); 880 ierr = ISDestroy(&list2);CHKERRQ(ierr); 881 ierr = PetscFree(indx1);CHKERRQ(ierr); 882 ierr = PetscFree(indx2);CHKERRQ(ierr); 883 #endif 884 break; 885 } 886 } 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "VecScatterFFTWToPetsc" 892 /*@ 893 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 894 895 Collective on Mat 896 897 Input Parameters: 898 + A - FFTW matrix 899 - x - FFTW vector 900 901 Output Parameters: 902 . y - PETSc vector 903 904 Level: intermediate 905 906 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 907 VecScatterFFTWToPetsc removes those extra zeros. 908 909 .seealso: VecScatterPetscToFFTW() 910 @*/ 911 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 912 { 913 PetscErrorCode ierr; 914 915 PetscFunctionBegin; 916 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 917 PetscFunctionReturn(0); 918 } 919 920 #undef __FUNCT__ 921 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 922 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 923 { 924 PetscErrorCode ierr; 925 MPI_Comm comm; 926 Mat_FFT *fft = (Mat_FFT*)A->data; 927 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 928 PetscInt N = fft->N; 929 PetscInt ndim = fft->ndim,*dim=fft->dim; 930 PetscInt low; 931 PetscMPIInt rank,size; 932 ptrdiff_t local_n0,local_0_start; 933 ptrdiff_t local_n1,local_1_start; 934 VecScatter vecscat; 935 IS list1,list2; 936 #if !defined(PETSC_USE_COMPLEX) 937 PetscInt i,j,k,partial_dim; 938 PetscInt *indx1, *indx2, tempindx, tempindx1; 939 PetscInt NM; 940 ptrdiff_t temp; 941 #endif 942 943 PetscFunctionBegin; 944 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 945 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 946 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 947 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 948 949 if (size==1) { 950 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 951 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 952 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 953 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 955 ierr = ISDestroy(&list1);CHKERRQ(ierr); 956 957 } else { 958 switch (ndim) { 959 case 1: 960 #if defined(PETSC_USE_COMPLEX) 961 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 962 963 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 964 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 965 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 966 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 969 ierr = ISDestroy(&list1);CHKERRQ(ierr); 970 ierr = ISDestroy(&list2);CHKERRQ(ierr); 971 #else 972 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 973 #endif 974 break; 975 case 2: 976 #if defined(PETSC_USE_COMPLEX) 977 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 978 979 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 980 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 981 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 982 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 985 ierr = ISDestroy(&list1);CHKERRQ(ierr); 986 ierr = ISDestroy(&list2);CHKERRQ(ierr); 987 #else 988 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 989 990 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 991 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 992 993 if (dim[1]%2==0) NM = dim[1]+2; 994 else NM = dim[1]+1; 995 996 for (i=0; i<local_n0; i++) { 997 for (j=0; j<dim[1]; j++) { 998 tempindx = i*dim[1] + j; 999 tempindx1 = i*NM + j; 1000 1001 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1002 indx2[tempindx]=low+tempindx1; 1003 } 1004 } 1005 1006 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1007 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1008 1009 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1010 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1012 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1013 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1014 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1015 ierr = PetscFree(indx1);CHKERRQ(ierr); 1016 ierr = PetscFree(indx2);CHKERRQ(ierr); 1017 #endif 1018 break; 1019 case 3: 1020 #if defined(PETSC_USE_COMPLEX) 1021 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1022 1023 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1024 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1025 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1026 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1029 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1030 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1031 #else 1032 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1033 1034 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1035 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1036 1037 if (dim[2]%2==0) NM = dim[2]+2; 1038 else NM = dim[2]+1; 1039 1040 for (i=0; i<local_n0; i++) { 1041 for (j=0; j<dim[1]; j++) { 1042 for (k=0; k<dim[2]; k++) { 1043 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1044 tempindx1 = i*dim[1]*NM + j*NM + k; 1045 1046 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1047 indx2[tempindx]=low+tempindx1; 1048 } 1049 } 1050 } 1051 1052 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1053 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1054 1055 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1056 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1057 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1059 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1060 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1061 ierr = PetscFree(indx1);CHKERRQ(ierr); 1062 ierr = PetscFree(indx2);CHKERRQ(ierr); 1063 #endif 1064 break; 1065 default: 1066 #if defined(PETSC_USE_COMPLEX) 1067 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1068 1069 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1070 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1071 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1072 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1073 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1075 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1076 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1077 #else 1078 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1079 1080 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1081 1082 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1083 1084 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1085 1086 partial_dim = fftw->partial_dim; 1087 1088 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1089 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1090 1091 if (dim[ndim-1]%2==0) NM = 2; 1092 else NM = 1; 1093 1094 j = low; 1095 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1096 indx1[i] = local_0_start*partial_dim + i; 1097 indx2[i] = j; 1098 if (k%dim[ndim-1]==0) j+=NM; 1099 j++; 1100 } 1101 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1102 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1103 1104 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1105 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1106 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1107 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1108 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1109 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1110 ierr = PetscFree(indx1);CHKERRQ(ierr); 1111 ierr = PetscFree(indx2);CHKERRQ(ierr); 1112 #endif 1113 break; 1114 } 1115 } 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatCreate_FFTW" 1121 /* 1122 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1123 1124 Options Database Keys: 1125 + -mat_fftw_plannerflags - set FFTW planner flags 1126 1127 Level: intermediate 1128 1129 */ 1130 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1131 { 1132 PetscErrorCode ierr; 1133 MPI_Comm comm; 1134 Mat_FFT *fft=(Mat_FFT*)A->data; 1135 Mat_FFTW *fftw; 1136 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1137 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1138 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1139 PetscBool flg; 1140 PetscInt p_flag,partial_dim=1,ctr; 1141 PetscMPIInt size,rank; 1142 ptrdiff_t *pdim; 1143 ptrdiff_t local_n1,local_1_start; 1144 #if !defined(PETSC_USE_COMPLEX) 1145 ptrdiff_t temp; 1146 PetscInt N1,tot_dim; 1147 #else 1148 PetscInt n1; 1149 #endif 1150 1151 PetscFunctionBegin; 1152 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1153 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1154 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1155 1156 fftw_mpi_init(); 1157 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1158 pdim[0] = dim[0]; 1159 #if !defined(PETSC_USE_COMPLEX) 1160 tot_dim = 2*dim[0]; 1161 #endif 1162 for (ctr=1; ctr<ndim; ctr++) { 1163 partial_dim *= dim[ctr]; 1164 pdim[ctr] = dim[ctr]; 1165 #if !defined(PETSC_USE_COMPLEX) 1166 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1167 else tot_dim *= dim[ctr]; 1168 #endif 1169 } 1170 1171 if (size == 1) { 1172 #if defined(PETSC_USE_COMPLEX) 1173 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1174 n = N; 1175 #else 1176 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1177 n = tot_dim; 1178 #endif 1179 1180 } else { 1181 ptrdiff_t local_n0,local_0_start; 1182 switch (ndim) { 1183 case 1: 1184 #if !defined(PETSC_USE_COMPLEX) 1185 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1186 #else 1187 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1188 1189 n = (PetscInt)local_n0; 1190 n1 = (PetscInt)local_n1; 1191 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1192 #endif 1193 break; 1194 case 2: 1195 #if defined(PETSC_USE_COMPLEX) 1196 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1197 /* 1198 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1199 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1200 */ 1201 n = (PetscInt)local_n0*dim[1]; 1202 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1203 #else 1204 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1205 1206 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1207 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1208 #endif 1209 break; 1210 case 3: 1211 #if defined(PETSC_USE_COMPLEX) 1212 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1213 1214 n = (PetscInt)local_n0*dim[1]*dim[2]; 1215 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1216 #else 1217 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1218 1219 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1220 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1221 #endif 1222 break; 1223 default: 1224 #if defined(PETSC_USE_COMPLEX) 1225 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1226 1227 n = (PetscInt)local_n0*partial_dim; 1228 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1229 #else 1230 temp = pdim[ndim-1]; 1231 1232 pdim[ndim-1] = temp/2 + 1; 1233 1234 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1235 1236 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1237 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1238 1239 pdim[ndim-1] = temp; 1240 1241 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1242 #endif 1243 break; 1244 } 1245 } 1246 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1247 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1248 fft->data = (void*)fftw; 1249 1250 fft->n = n; 1251 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1252 fftw->partial_dim = partial_dim; 1253 1254 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1255 if (size == 1) { 1256 #if defined(PETSC_USE_64BIT_INDICES) 1257 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1258 #else 1259 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1260 #endif 1261 } 1262 1263 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1264 1265 fftw->p_forward = 0; 1266 fftw->p_backward = 0; 1267 fftw->p_flag = FFTW_ESTIMATE; 1268 1269 if (size == 1) { 1270 A->ops->mult = MatMult_SeqFFTW; 1271 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1272 } else { 1273 A->ops->mult = MatMult_MPIFFTW; 1274 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1275 } 1276 fft->matdestroy = MatDestroy_FFTW; 1277 A->assembled = PETSC_TRUE; 1278 A->preallocated = PETSC_TRUE; 1279 1280 ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1283 1284 /* get runtime options */ 1285 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1286 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1287 if (flg) { 1288 fftw->p_flag = iplans[p_flag]; 1289 } 1290 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 1295 1296 1297