1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 #if defined(PETSC_USE_64BIT_INDICES) 15 fftw_iodim64 *iodims; 16 #else 17 fftw_iodim *iodims; 18 #endif 19 PetscInt partial_dim; 20 fftw_plan p_forward,p_backward; 21 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23 executed for the arrays with which the plan was created */ 24 } Mat_FFTW; 25 26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30 extern PetscErrorCode MatDestroy_FFTW(Mat); 31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32 33 /* MatMult_SeqFFTW performs forward DFT in parallel 34 Input parameter: 35 A - the matrix 36 x - the vector on which FDFT will be performed 37 38 Output parameter: 39 y - vector that stores result of FDFT 40 */ 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatMult_SeqFFTW" 43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 44 { 45 PetscErrorCode ierr; 46 Mat_FFT *fft = (Mat_FFT*)A->data; 47 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 48 PetscScalar *x_array,*y_array; 49 #if defined(PETSC_USE_COMPLEX) 50 #if defined(PETSC_USE_64BIT_INDICES) 51 fftw_iodim64 *iodims; 52 #else 53 fftw_iodim *iodims; 54 #endif 55 PetscInt i; 56 #endif 57 PetscInt ndim = fft->ndim,*dim = fft->dim; 58 59 PetscFunctionBegin; 60 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 61 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 62 if (!fftw->p_forward) { /* create a plan, then excute it */ 63 switch (ndim) { 64 case 1: 65 #if defined(PETSC_USE_COMPLEX) 66 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 67 #else 68 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 69 #endif 70 break; 71 case 2: 72 #if defined(PETSC_USE_COMPLEX) 73 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 74 #else 75 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 76 #endif 77 break; 78 case 3: 79 #if defined(PETSC_USE_COMPLEX) 80 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 81 #else 82 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 83 #endif 84 break; 85 default: 86 #if defined(PETSC_USE_COMPLEX) 87 iodims = fftw->iodims; 88 #if defined(PETSC_USE_64BIT_INDICES) 89 if (ndim) { 90 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 91 iodims[ndim-1].is = iodims[ndim-1].os = 1; 92 for (i=ndim-2; i>=0; --i) { 93 iodims[i].n = (ptrdiff_t)dim[i]; 94 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 95 } 96 } 97 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 98 #else 99 if (ndim) { 100 iodims[ndim-1].n = (int)dim[ndim-1]; 101 iodims[ndim-1].is = iodims[ndim-1].os = 1; 102 for (i=ndim-2; i>=0; --i) { 103 iodims[i].n = (int)dim[i]; 104 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 105 } 106 } 107 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 108 #endif 109 110 #else 111 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 112 #endif 113 break; 114 } 115 fftw->finarray = x_array; 116 fftw->foutarray = y_array; 117 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 118 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 119 fftw_execute(fftw->p_forward); 120 } else { /* use existing plan */ 121 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 122 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 123 } else { 124 fftw_execute(fftw->p_forward); 125 } 126 } 127 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 128 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 /* MatMultTranspose_SeqFFTW performs serial backward DFT 133 Input parameter: 134 A - the matrix 135 x - the vector on which BDFT will be performed 136 137 Output parameter: 138 y - vector that stores result of BDFT 139 */ 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 143 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 144 { 145 PetscErrorCode ierr; 146 Mat_FFT *fft = (Mat_FFT*)A->data; 147 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 148 PetscScalar *x_array,*y_array; 149 PetscInt ndim=fft->ndim,*dim=fft->dim; 150 #if defined(PETSC_USE_COMPLEX) 151 #if defined(PETSC_USE_64BIT_INDICES) 152 fftw_iodim64 *iodims=fftw->iodims; 153 #else 154 fftw_iodim *iodims=fftw->iodims; 155 #endif 156 #endif 157 158 PetscFunctionBegin; 159 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 160 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 161 if (!fftw->p_backward) { /* create a plan, then excute it */ 162 switch (ndim) { 163 case 1: 164 #if defined(PETSC_USE_COMPLEX) 165 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 166 #else 167 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 168 #endif 169 break; 170 case 2: 171 #if defined(PETSC_USE_COMPLEX) 172 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 173 #else 174 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 175 #endif 176 break; 177 case 3: 178 #if defined(PETSC_USE_COMPLEX) 179 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 180 #else 181 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 182 #endif 183 break; 184 default: 185 #if defined(PETSC_USE_COMPLEX) 186 #if defined(PETSC_USE_64BIT_INDICES) 187 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 188 #else 189 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 190 #endif 191 #else 192 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 193 #endif 194 break; 195 } 196 fftw->binarray = x_array; 197 fftw->boutarray = y_array; 198 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 199 } else { /* use existing plan */ 200 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 201 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202 } else { 203 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 204 } 205 } 206 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 /* MatMult_MPIFFTW performs forward DFT in parallel 212 Input parameter: 213 A - the matrix 214 x - the vector on which FDFT will be performed 215 216 Output parameter: 217 y - vector that stores result of FDFT 218 */ 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatMult_MPIFFTW" 221 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 222 { 223 PetscErrorCode ierr; 224 Mat_FFT *fft = (Mat_FFT*)A->data; 225 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 226 PetscScalar *x_array,*y_array; 227 PetscInt ndim=fft->ndim,*dim=fft->dim; 228 MPI_Comm comm; 229 230 PetscFunctionBegin; 231 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 232 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 233 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 234 if (!fftw->p_forward) { /* create a plan, then excute it */ 235 switch (ndim) { 236 case 1: 237 #if defined(PETSC_USE_COMPLEX) 238 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 239 #else 240 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 241 #endif 242 break; 243 case 2: 244 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 245 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 246 #else 247 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 248 #endif 249 break; 250 case 3: 251 #if defined(PETSC_USE_COMPLEX) 252 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 253 #else 254 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 255 #endif 256 break; 257 default: 258 #if defined(PETSC_USE_COMPLEX) 259 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 260 #else 261 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 262 #endif 263 break; 264 } 265 fftw->finarray = x_array; 266 fftw->foutarray = y_array; 267 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 268 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 269 fftw_execute(fftw->p_forward); 270 } else { /* use existing plan */ 271 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 272 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 273 } else { 274 fftw_execute(fftw->p_forward); 275 } 276 } 277 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 278 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 283 Input parameter: 284 A - the matrix 285 x - the vector on which BDFT will be performed 286 287 Output parameter: 288 y - vector that stores result of BDFT 289 */ 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 292 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 293 { 294 PetscErrorCode ierr; 295 Mat_FFT *fft = (Mat_FFT*)A->data; 296 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 297 PetscScalar *x_array,*y_array; 298 PetscInt ndim=fft->ndim,*dim=fft->dim; 299 MPI_Comm comm; 300 301 PetscFunctionBegin; 302 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 303 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 304 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 305 if (!fftw->p_backward) { /* create a plan, then excute it */ 306 switch (ndim) { 307 case 1: 308 #if defined(PETSC_USE_COMPLEX) 309 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 310 #else 311 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 312 #endif 313 break; 314 case 2: 315 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 316 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 317 #else 318 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 319 #endif 320 break; 321 case 3: 322 #if defined(PETSC_USE_COMPLEX) 323 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 324 #else 325 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 326 #endif 327 break; 328 default: 329 #if defined(PETSC_USE_COMPLEX) 330 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 331 #else 332 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 333 #endif 334 break; 335 } 336 fftw->binarray = x_array; 337 fftw->boutarray = y_array; 338 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 339 } else { /* use existing plan */ 340 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 341 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 342 } else { 343 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 344 } 345 } 346 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 347 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 #undef __FUNCT__ 352 #define __FUNCT__ "MatDestroy_FFTW" 353 PetscErrorCode MatDestroy_FFTW(Mat A) 354 { 355 Mat_FFT *fft = (Mat_FFT*)A->data; 356 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 357 PetscErrorCode ierr; 358 359 PetscFunctionBegin; 360 fftw_destroy_plan(fftw->p_forward); 361 fftw_destroy_plan(fftw->p_backward); 362 if (fftw->iodims) { 363 free(fftw->iodims); 364 } 365 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 366 ierr = PetscFree(fft->data);CHKERRQ(ierr); 367 fftw_mpi_cleanup(); 368 PetscFunctionReturn(0); 369 } 370 371 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 372 #undef __FUNCT__ 373 #define __FUNCT__ "VecDestroy_MPIFFTW" 374 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 375 { 376 PetscErrorCode ierr; 377 PetscScalar *array; 378 379 PetscFunctionBegin; 380 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 381 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 382 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 383 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatCreateVecsFFTW" 389 /*@ 390 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 391 parallel layout determined by FFTW 392 393 Collective on Mat 394 395 Input Parameter: 396 . A - the matrix 397 398 Output Parameter: 399 + x - (optional) input vector of forward FFTW 400 - y - (optional) output vector of forward FFTW 401 - z - (optional) output vector of backward FFTW 402 403 Level: advanced 404 405 Note: The parallel layout of output of forward FFTW is always same as the input 406 of the backward FFTW. But parallel layout of the input vector of forward 407 FFTW might not be same as the output of backward FFTW. 408 Also note that we need to provide enough space while doing parallel real transform. 409 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 410 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 411 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 412 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 413 zeros if it is odd only one zero is needed. 414 Lastly one needs some scratch space at the end of data set in each process. alloc_local 415 figures out how much space is needed, i.e. it figures out the data+scratch space for 416 each processor and returns that. 417 418 .seealso: MatCreateFFTW() 419 @*/ 420 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 421 { 422 PetscErrorCode ierr; 423 424 PetscFunctionBegin; 425 ierr = PetscTryMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatCreateVecsFFTW_FFTW" 431 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 432 { 433 PetscErrorCode ierr; 434 PetscMPIInt size,rank; 435 MPI_Comm comm; 436 Mat_FFT *fft = (Mat_FFT*)A->data; 437 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 438 PetscInt N = fft->N; 439 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 440 441 PetscFunctionBegin; 442 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 443 PetscValidType(A,1); 444 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 445 446 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 447 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 448 if (size == 1) { /* sequential case */ 449 #if defined(PETSC_USE_COMPLEX) 450 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 451 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 452 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 453 #else 454 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 455 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 456 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 457 #endif 458 } else { /* parallel cases */ 459 ptrdiff_t alloc_local,local_n0,local_0_start; 460 ptrdiff_t local_n1; 461 fftw_complex *data_fout; 462 ptrdiff_t local_1_start; 463 #if defined(PETSC_USE_COMPLEX) 464 fftw_complex *data_fin,*data_bout; 465 #else 466 double *data_finr,*data_boutr; 467 PetscInt n1,N1; 468 ptrdiff_t temp; 469 #endif 470 471 switch (ndim) { 472 case 1: 473 #if !defined(PETSC_USE_COMPLEX) 474 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 475 #else 476 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 477 if (fin) { 478 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 479 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 480 481 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 482 } 483 if (fout) { 484 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 485 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 486 487 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 488 } 489 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 490 if (bout) { 491 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 492 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 493 494 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 495 } 496 break; 497 #endif 498 case 2: 499 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 500 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 501 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 502 if (fin) { 503 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 504 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 505 506 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 507 } 508 if (bout) { 509 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 510 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 511 512 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 513 } 514 if (fout) { 515 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 516 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 517 518 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 #else 521 /* Get local size */ 522 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 523 if (fin) { 524 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 525 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 526 527 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 528 } 529 if (fout) { 530 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 531 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 532 533 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 if (bout) { 536 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 538 539 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 540 } 541 #endif 542 break; 543 case 3: 544 #if !defined(PETSC_USE_COMPLEX) 545 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 546 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 547 if (fin) { 548 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 549 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 550 551 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 if (bout) { 554 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 555 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 556 557 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 558 } 559 560 if (fout) { 561 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 562 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 563 564 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 565 } 566 #else 567 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 568 if (fin) { 569 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 570 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 571 572 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 573 } 574 if (fout) { 575 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 576 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 577 578 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 579 } 580 if (bout) { 581 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 582 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 583 584 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 585 } 586 #endif 587 break; 588 default: 589 #if !defined(PETSC_USE_COMPLEX) 590 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 591 592 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 593 594 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 595 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 596 597 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 598 599 if (fin) { 600 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 601 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 602 603 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 604 } 605 if (bout) { 606 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 607 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 608 609 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 610 } 611 612 if (fout) { 613 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 614 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 615 616 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 617 } 618 #else 619 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 620 if (fin) { 621 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 622 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 623 624 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 625 } 626 if (fout) { 627 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 628 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 629 630 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 631 } 632 if (bout) { 633 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 634 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 635 636 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 637 } 638 #endif 639 break; 640 } 641 } 642 PetscFunctionReturn(0); 643 } 644 645 #undef __FUNCT__ 646 #define __FUNCT__ "VecScatterPetscToFFTW" 647 /*@ 648 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 649 650 Collective on Mat 651 652 Input Parameters: 653 + A - FFTW matrix 654 - x - the PETSc vector 655 656 Output Parameters: 657 . y - the FFTW vector 658 659 Options Database Keys: 660 . -mat_fftw_plannerflags - set FFTW planner flags 661 662 Level: intermediate 663 664 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 665 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 666 zeros. This routine does that job by scattering operation. 667 668 .seealso: VecScatterFFTWToPetsc() 669 @*/ 670 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 671 { 672 PetscErrorCode ierr; 673 674 PetscFunctionBegin; 675 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 681 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 682 { 683 PetscErrorCode ierr; 684 MPI_Comm comm; 685 Mat_FFT *fft = (Mat_FFT*)A->data; 686 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 687 PetscInt N =fft->N; 688 PetscInt ndim =fft->ndim,*dim=fft->dim; 689 PetscInt low; 690 PetscMPIInt rank,size; 691 PetscInt vsize,vsize1; 692 ptrdiff_t local_n0,local_0_start; 693 ptrdiff_t local_n1,local_1_start; 694 VecScatter vecscat; 695 IS list1,list2; 696 #if !defined(PETSC_USE_COMPLEX) 697 PetscInt i,j,k,partial_dim; 698 PetscInt *indx1, *indx2, tempindx, tempindx1; 699 PetscInt NM; 700 ptrdiff_t temp; 701 #endif 702 703 PetscFunctionBegin; 704 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 705 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 706 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 707 ierr = VecGetOwnershipRange(y,&low,NULL); 708 709 if (size==1) { 710 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 711 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 712 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 713 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 714 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 715 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 716 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 717 ierr = ISDestroy(&list1);CHKERRQ(ierr); 718 } else { 719 switch (ndim) { 720 case 1: 721 #if defined(PETSC_USE_COMPLEX) 722 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 723 724 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 725 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 726 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 727 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 728 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 729 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 730 ierr = ISDestroy(&list1);CHKERRQ(ierr); 731 ierr = ISDestroy(&list2);CHKERRQ(ierr); 732 #else 733 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 734 #endif 735 break; 736 case 2: 737 #if defined(PETSC_USE_COMPLEX) 738 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 739 740 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 741 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 742 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 743 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 744 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 745 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 746 ierr = ISDestroy(&list1);CHKERRQ(ierr); 747 ierr = ISDestroy(&list2);CHKERRQ(ierr); 748 #else 749 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 750 751 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 752 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 753 754 if (dim[1]%2==0) { 755 NM = dim[1]+2; 756 } else { 757 NM = dim[1]+1; 758 } 759 for (i=0; i<local_n0; i++) { 760 for (j=0; j<dim[1]; j++) { 761 tempindx = i*dim[1] + j; 762 tempindx1 = i*NM + j; 763 764 indx1[tempindx]=local_0_start*dim[1]+tempindx; 765 indx2[tempindx]=low+tempindx1; 766 } 767 } 768 769 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 770 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 771 772 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 773 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 774 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 776 ierr = ISDestroy(&list1);CHKERRQ(ierr); 777 ierr = ISDestroy(&list2);CHKERRQ(ierr); 778 ierr = PetscFree(indx1);CHKERRQ(ierr); 779 ierr = PetscFree(indx2);CHKERRQ(ierr); 780 #endif 781 break; 782 783 case 3: 784 #if defined(PETSC_USE_COMPLEX) 785 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 786 787 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 788 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 789 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 790 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 793 ierr = ISDestroy(&list1);CHKERRQ(ierr); 794 ierr = ISDestroy(&list2);CHKERRQ(ierr); 795 #else 796 /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 797 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 798 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 799 800 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 801 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 802 803 if (dim[2]%2==0) NM = dim[2]+2; 804 else NM = dim[2]+1; 805 806 for (i=0; i<local_n0; i++) { 807 for (j=0; j<dim[1]; j++) { 808 for (k=0; k<dim[2]; k++) { 809 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 810 tempindx1 = i*dim[1]*NM + j*NM + k; 811 812 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 813 indx2[tempindx]=low+tempindx1; 814 } 815 } 816 } 817 818 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 819 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 820 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 821 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 824 ierr = ISDestroy(&list1);CHKERRQ(ierr); 825 ierr = ISDestroy(&list2);CHKERRQ(ierr); 826 ierr = PetscFree(indx1);CHKERRQ(ierr); 827 ierr = PetscFree(indx2);CHKERRQ(ierr); 828 #endif 829 break; 830 831 default: 832 #if defined(PETSC_USE_COMPLEX) 833 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 834 835 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 836 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 837 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 838 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 839 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 840 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 841 ierr = ISDestroy(&list1);CHKERRQ(ierr); 842 ierr = ISDestroy(&list2);CHKERRQ(ierr); 843 #else 844 /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 845 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 846 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 847 848 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 849 850 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 851 852 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 853 854 partial_dim = fftw->partial_dim; 855 856 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 857 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 858 859 if (dim[ndim-1]%2==0) NM = 2; 860 else NM = 1; 861 862 j = low; 863 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 864 indx1[i] = local_0_start*partial_dim + i; 865 indx2[i] = j; 866 if (k%dim[ndim-1]==0) j+=NM; 867 j++; 868 } 869 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 870 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 871 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 872 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 875 ierr = ISDestroy(&list1);CHKERRQ(ierr); 876 ierr = ISDestroy(&list2);CHKERRQ(ierr); 877 ierr = PetscFree(indx1);CHKERRQ(ierr); 878 ierr = PetscFree(indx2);CHKERRQ(ierr); 879 #endif 880 break; 881 } 882 } 883 PetscFunctionReturn(0); 884 } 885 886 #undef __FUNCT__ 887 #define __FUNCT__ "VecScatterFFTWToPetsc" 888 /*@ 889 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 890 891 Collective on Mat 892 893 Input Parameters: 894 + A - FFTW matrix 895 - x - FFTW vector 896 897 Output Parameters: 898 . y - PETSc vector 899 900 Level: intermediate 901 902 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 903 VecScatterFFTWToPetsc removes those extra zeros. 904 905 .seealso: VecScatterPetscToFFTW() 906 @*/ 907 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 908 { 909 PetscErrorCode ierr; 910 911 PetscFunctionBegin; 912 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 918 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 919 { 920 PetscErrorCode ierr; 921 MPI_Comm comm; 922 Mat_FFT *fft = (Mat_FFT*)A->data; 923 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 924 PetscInt N = fft->N; 925 PetscInt ndim = fft->ndim,*dim=fft->dim; 926 PetscInt low; 927 PetscMPIInt rank,size; 928 ptrdiff_t local_n0,local_0_start; 929 ptrdiff_t local_n1,local_1_start; 930 VecScatter vecscat; 931 IS list1,list2; 932 #if !defined(PETSC_USE_COMPLEX) 933 PetscInt i,j,k,partial_dim; 934 PetscInt *indx1, *indx2, tempindx, tempindx1; 935 PetscInt NM; 936 ptrdiff_t temp; 937 #endif 938 939 PetscFunctionBegin; 940 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 941 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 942 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 943 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 944 945 if (size==1) { 946 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 947 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 948 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 949 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 950 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 951 ierr = ISDestroy(&list1);CHKERRQ(ierr); 952 953 } else { 954 switch (ndim) { 955 case 1: 956 #if defined(PETSC_USE_COMPLEX) 957 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 958 959 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 960 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 961 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 962 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 964 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 965 ierr = ISDestroy(&list1);CHKERRQ(ierr); 966 ierr = ISDestroy(&list2);CHKERRQ(ierr); 967 #else 968 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 969 #endif 970 break; 971 case 2: 972 #if defined(PETSC_USE_COMPLEX) 973 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 974 975 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 976 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 977 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 978 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 979 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 981 ierr = ISDestroy(&list1);CHKERRQ(ierr); 982 ierr = ISDestroy(&list2);CHKERRQ(ierr); 983 #else 984 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 985 986 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 987 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 988 989 if (dim[1]%2==0) NM = dim[1]+2; 990 else NM = dim[1]+1; 991 992 for (i=0; i<local_n0; i++) { 993 for (j=0; j<dim[1]; j++) { 994 tempindx = i*dim[1] + j; 995 tempindx1 = i*NM + j; 996 997 indx1[tempindx]=local_0_start*dim[1]+tempindx; 998 indx2[tempindx]=low+tempindx1; 999 } 1000 } 1001 1002 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1003 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1004 1005 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1006 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1009 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1010 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1011 ierr = PetscFree(indx1);CHKERRQ(ierr); 1012 ierr = PetscFree(indx2);CHKERRQ(ierr); 1013 #endif 1014 break; 1015 case 3: 1016 #if defined(PETSC_USE_COMPLEX) 1017 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1018 1019 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1020 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1021 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1022 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1024 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1025 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1026 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1027 #else 1028 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1029 1030 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1031 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1032 1033 if (dim[2]%2==0) NM = dim[2]+2; 1034 else NM = dim[2]+1; 1035 1036 for (i=0; i<local_n0; i++) { 1037 for (j=0; j<dim[1]; j++) { 1038 for (k=0; k<dim[2]; k++) { 1039 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1040 tempindx1 = i*dim[1]*NM + j*NM + k; 1041 1042 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1043 indx2[tempindx]=low+tempindx1; 1044 } 1045 } 1046 } 1047 1048 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1049 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1050 1051 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1052 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1055 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1056 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1057 ierr = PetscFree(indx1);CHKERRQ(ierr); 1058 ierr = PetscFree(indx2);CHKERRQ(ierr); 1059 #endif 1060 break; 1061 default: 1062 #if defined(PETSC_USE_COMPLEX) 1063 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1064 1065 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1066 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1067 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1068 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1071 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1072 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1073 #else 1074 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1075 1076 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1077 1078 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1079 1080 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1081 1082 partial_dim = fftw->partial_dim; 1083 1084 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1085 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1086 1087 if (dim[ndim-1]%2==0) NM = 2; 1088 else NM = 1; 1089 1090 j = low; 1091 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1092 indx1[i] = local_0_start*partial_dim + i; 1093 indx2[i] = j; 1094 if (k%dim[ndim-1]==0) j+=NM; 1095 j++; 1096 } 1097 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1098 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1099 1100 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1101 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1102 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1103 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1104 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1105 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1106 ierr = PetscFree(indx1);CHKERRQ(ierr); 1107 ierr = PetscFree(indx2);CHKERRQ(ierr); 1108 #endif 1109 break; 1110 } 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatCreate_FFTW" 1117 /* 1118 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1119 1120 Options Database Keys: 1121 + -mat_fftw_plannerflags - set FFTW planner flags 1122 1123 Level: intermediate 1124 1125 */ 1126 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1127 { 1128 PetscErrorCode ierr; 1129 MPI_Comm comm; 1130 Mat_FFT *fft=(Mat_FFT*)A->data; 1131 Mat_FFTW *fftw; 1132 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1133 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1134 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1135 PetscBool flg; 1136 PetscInt p_flag,partial_dim=1,ctr; 1137 PetscMPIInt size,rank; 1138 ptrdiff_t *pdim; 1139 ptrdiff_t local_n1,local_1_start; 1140 #if !defined(PETSC_USE_COMPLEX) 1141 ptrdiff_t temp; 1142 PetscInt N1,tot_dim; 1143 #else 1144 PetscInt n1; 1145 #endif 1146 1147 PetscFunctionBegin; 1148 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1149 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1150 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1151 1152 fftw_mpi_init(); 1153 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1154 pdim[0] = dim[0]; 1155 #if !defined(PETSC_USE_COMPLEX) 1156 tot_dim = 2*dim[0]; 1157 #endif 1158 for (ctr=1; ctr<ndim; ctr++) { 1159 partial_dim *= dim[ctr]; 1160 pdim[ctr] = dim[ctr]; 1161 #if !defined(PETSC_USE_COMPLEX) 1162 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1163 else tot_dim *= dim[ctr]; 1164 #endif 1165 } 1166 1167 if (size == 1) { 1168 #if defined(PETSC_USE_COMPLEX) 1169 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1170 n = N; 1171 #else 1172 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1173 n = tot_dim; 1174 #endif 1175 1176 } else { 1177 ptrdiff_t local_n0,local_0_start; 1178 switch (ndim) { 1179 case 1: 1180 #if !defined(PETSC_USE_COMPLEX) 1181 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1182 #else 1183 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1184 1185 n = (PetscInt)local_n0; 1186 n1 = (PetscInt)local_n1; 1187 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1188 #endif 1189 break; 1190 case 2: 1191 #if defined(PETSC_USE_COMPLEX) 1192 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1193 /* 1194 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1195 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1196 */ 1197 n = (PetscInt)local_n0*dim[1]; 1198 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1199 #else 1200 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1201 1202 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1203 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1204 #endif 1205 break; 1206 case 3: 1207 #if defined(PETSC_USE_COMPLEX) 1208 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1209 1210 n = (PetscInt)local_n0*dim[1]*dim[2]; 1211 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1212 #else 1213 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1214 1215 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1216 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1217 #endif 1218 break; 1219 default: 1220 #if defined(PETSC_USE_COMPLEX) 1221 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1222 1223 n = (PetscInt)local_n0*partial_dim; 1224 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1225 #else 1226 temp = pdim[ndim-1]; 1227 1228 pdim[ndim-1] = temp/2 + 1; 1229 1230 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1231 1232 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1233 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1234 1235 pdim[ndim-1] = temp; 1236 1237 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1238 #endif 1239 break; 1240 } 1241 } 1242 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1243 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1244 fft->data = (void*)fftw; 1245 1246 fft->n = n; 1247 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1248 fftw->partial_dim = partial_dim; 1249 1250 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1251 if (size == 1) { 1252 #if defined(PETSC_USE_64BIT_INDICES) 1253 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1254 #else 1255 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1256 #endif 1257 } 1258 1259 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1260 1261 fftw->p_forward = 0; 1262 fftw->p_backward = 0; 1263 fftw->p_flag = FFTW_ESTIMATE; 1264 1265 if (size == 1) { 1266 A->ops->mult = MatMult_SeqFFTW; 1267 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1268 } else { 1269 A->ops->mult = MatMult_MPIFFTW; 1270 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1271 } 1272 fft->matdestroy = MatDestroy_FFTW; 1273 A->assembled = PETSC_TRUE; 1274 A->preallocated = PETSC_TRUE; 1275 1276 ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1279 1280 /* get runtime options */ 1281 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1282 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1283 if (flg) { 1284 fftw->p_flag = iplans[p_flag]; 1285 } 1286 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 1291 1292 1293