1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 #if defined(PETSC_USE_64BIT_INDICES) 15 fftw_iodim64 *iodims; 16 #else 17 fftw_iodim *iodims; 18 #endif 19 PetscInt partial_dim; 20 fftw_plan p_forward,p_backward; 21 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23 executed for the arrays with which the plan was created */ 24 } Mat_FFTW; 25 26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30 extern PetscErrorCode MatDestroy_FFTW(Mat); 31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32 33 /* MatMult_SeqFFTW performs forward DFT in parallel 34 Input parameter: 35 A - the matrix 36 x - the vector on which FDFT will be performed 37 38 Output parameter: 39 y - vector that stores result of FDFT 40 */ 41 #undef __FUNCT__ 42 #define __FUNCT__ "MatMult_SeqFFTW" 43 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 44 { 45 PetscErrorCode ierr; 46 Mat_FFT *fft = (Mat_FFT*)A->data; 47 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 48 PetscScalar *x_array,*y_array; 49 #if defined(PETSC_USE_64BIT_INDICES) 50 fftw_iodim64 *iodims; 51 #else 52 fftw_iodim *iodims; 53 #endif 54 PetscInt ndim = fft->ndim,*dim = fft->dim,i; 55 56 PetscFunctionBegin; 57 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 58 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 59 if (!fftw->p_forward) { /* create a plan, then excute it */ 60 switch (ndim) { 61 case 1: 62 #if defined(PETSC_USE_COMPLEX) 63 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 64 #else 65 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 66 #endif 67 break; 68 case 2: 69 #if defined(PETSC_USE_COMPLEX) 70 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 71 #else 72 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 73 #endif 74 break; 75 case 3: 76 #if defined(PETSC_USE_COMPLEX) 77 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 78 #else 79 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 80 #endif 81 break; 82 default: 83 #if defined(PETSC_USE_COMPLEX) 84 iodims = fftw->iodims; 85 #if defined(PETSC_USE_64BIT_INDICES) 86 if (ndim) { 87 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 88 iodims[ndim-1].is = iodims[ndim-1].os = 1; 89 for (i=ndim-2; i>=0; --i) { 90 iodims[i].n = (ptrdiff_t)dim[i]; 91 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 92 } 93 } 94 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 95 #else 96 if (ndim) { 97 iodims[ndim-1].n = (int)dim[ndim-1]; 98 iodims[ndim-1].is = iodims[ndim-1].os = 1; 99 for (i=ndim-2; i>=0; --i) { 100 iodims[i].n = (int)dim[i]; 101 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 102 } 103 } 104 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 105 #endif 106 107 #else 108 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 109 #endif 110 break; 111 } 112 fftw->finarray = x_array; 113 fftw->foutarray = y_array; 114 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 115 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 116 fftw_execute(fftw->p_forward); 117 } else { /* use existing plan */ 118 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 119 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 120 } else { 121 fftw_execute(fftw->p_forward); 122 } 123 } 124 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 125 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 126 PetscFunctionReturn(0); 127 } 128 129 /* MatMultTranspose_SeqFFTW performs serial backward DFT 130 Input parameter: 131 A - the matrix 132 x - the vector on which BDFT will be performed 133 134 Output parameter: 135 y - vector that stores result of BDFT 136 */ 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 140 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 141 { 142 PetscErrorCode ierr; 143 Mat_FFT *fft = (Mat_FFT*)A->data; 144 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 145 PetscScalar *x_array,*y_array; 146 PetscInt ndim=fft->ndim,*dim=fft->dim; 147 #if defined(PETSC_USE_64BIT_INDICES) 148 fftw_iodim64 *iodims=fftw->iodims; 149 #else 150 fftw_iodim *iodims=fftw->iodims; 151 #endif 152 153 PetscFunctionBegin; 154 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 155 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 156 if (!fftw->p_backward) { /* create a plan, then excute it */ 157 switch (ndim) { 158 case 1: 159 #if defined(PETSC_USE_COMPLEX) 160 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 161 #else 162 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 163 #endif 164 break; 165 case 2: 166 #if defined(PETSC_USE_COMPLEX) 167 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 168 #else 169 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 170 #endif 171 break; 172 case 3: 173 #if defined(PETSC_USE_COMPLEX) 174 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 175 #else 176 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 177 #endif 178 break; 179 default: 180 #if defined(PETSC_USE_COMPLEX) 181 #if defined(PETSC_USE_64BIT_INDICES) 182 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 183 #else 184 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 185 #endif 186 #else 187 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 188 #endif 189 break; 190 } 191 fftw->binarray = x_array; 192 fftw->boutarray = y_array; 193 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 194 } else { /* use existing plan */ 195 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 196 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 197 } else { 198 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 199 } 200 } 201 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 202 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 /* MatMult_MPIFFTW performs forward DFT in parallel 207 Input parameter: 208 A - the matrix 209 x - the vector on which FDFT will be performed 210 211 Output parameter: 212 y - vector that stores result of FDFT 213 */ 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatMult_MPIFFTW" 216 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 217 { 218 PetscErrorCode ierr; 219 Mat_FFT *fft = (Mat_FFT*)A->data; 220 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 221 PetscScalar *x_array,*y_array; 222 PetscInt ndim=fft->ndim,*dim=fft->dim; 223 MPI_Comm comm; 224 225 PetscFunctionBegin; 226 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 227 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 228 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 229 if (!fftw->p_forward) { /* create a plan, then excute it */ 230 switch (ndim) { 231 case 1: 232 #if defined(PETSC_USE_COMPLEX) 233 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 234 #else 235 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 236 #endif 237 break; 238 case 2: 239 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 240 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 241 #else 242 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 243 #endif 244 break; 245 case 3: 246 #if defined(PETSC_USE_COMPLEX) 247 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 248 #else 249 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 250 #endif 251 break; 252 default: 253 #if defined(PETSC_USE_COMPLEX) 254 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 255 #else 256 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 257 #endif 258 break; 259 } 260 fftw->finarray = x_array; 261 fftw->foutarray = y_array; 262 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 263 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 264 fftw_execute(fftw->p_forward); 265 } else { /* use existing plan */ 266 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 267 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 268 } else { 269 fftw_execute(fftw->p_forward); 270 } 271 } 272 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 273 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 278 Input parameter: 279 A - the matrix 280 x - the vector on which BDFT will be performed 281 282 Output parameter: 283 y - vector that stores result of BDFT 284 */ 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 287 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 288 { 289 PetscErrorCode ierr; 290 Mat_FFT *fft = (Mat_FFT*)A->data; 291 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 292 PetscScalar *x_array,*y_array; 293 PetscInt ndim=fft->ndim,*dim=fft->dim; 294 MPI_Comm comm; 295 296 PetscFunctionBegin; 297 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 298 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 299 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 300 if (!fftw->p_backward) { /* create a plan, then excute it */ 301 switch (ndim) { 302 case 1: 303 #if defined(PETSC_USE_COMPLEX) 304 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 305 #else 306 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 307 #endif 308 break; 309 case 2: 310 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 311 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 312 #else 313 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 314 #endif 315 break; 316 case 3: 317 #if defined(PETSC_USE_COMPLEX) 318 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 319 #else 320 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 321 #endif 322 break; 323 default: 324 #if defined(PETSC_USE_COMPLEX) 325 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 326 #else 327 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 328 #endif 329 break; 330 } 331 fftw->binarray = x_array; 332 fftw->boutarray = y_array; 333 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 334 } else { /* use existing plan */ 335 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 336 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 337 } else { 338 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 339 } 340 } 341 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 342 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "MatDestroy_FFTW" 348 PetscErrorCode MatDestroy_FFTW(Mat A) 349 { 350 Mat_FFT *fft = (Mat_FFT*)A->data; 351 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 fftw_destroy_plan(fftw->p_forward); 356 fftw_destroy_plan(fftw->p_backward); 357 if (fftw->iodims) { 358 free(fftw->iodims); 359 } 360 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 361 ierr = PetscFree(fft->data);CHKERRQ(ierr); 362 fftw_mpi_cleanup(); 363 PetscFunctionReturn(0); 364 } 365 366 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 367 #undef __FUNCT__ 368 #define __FUNCT__ "VecDestroy_MPIFFTW" 369 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 370 { 371 PetscErrorCode ierr; 372 PetscScalar *array; 373 374 PetscFunctionBegin; 375 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 376 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 377 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 378 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 379 PetscFunctionReturn(0); 380 } 381 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatGetVecsFFTW" 384 /*@ 385 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 386 parallel layout determined by FFTW 387 388 Collective on Mat 389 390 Input Parameter: 391 . A - the matrix 392 393 Output Parameter: 394 + x - (optional) input vector of forward FFTW 395 - y - (optional) output vector of forward FFTW 396 - z - (optional) output vector of backward FFTW 397 398 Level: advanced 399 400 Note: The parallel layout of output of forward FFTW is always same as the input 401 of the backward FFTW. But parallel layout of the input vector of forward 402 FFTW might not be same as the output of backward FFTW. 403 Also note that we need to provide enough space while doing parallel real transform. 404 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 405 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 406 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 407 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 408 zeros if it is odd only one zero is needed. 409 Lastly one needs some scratch space at the end of data set in each process. alloc_local 410 figures out how much space is needed, i.e. it figures out the data+scratch space for 411 each processor and returns that. 412 413 .seealso: MatCreateFFTW() 414 @*/ 415 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 416 { 417 PetscErrorCode ierr; 418 419 PetscFunctionBegin; 420 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 426 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 427 { 428 PetscErrorCode ierr; 429 PetscMPIInt size,rank; 430 MPI_Comm comm; 431 Mat_FFT *fft = (Mat_FFT*)A->data; 432 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 433 PetscInt N = fft->N; 434 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 435 436 PetscFunctionBegin; 437 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 438 PetscValidType(A,1); 439 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 440 441 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 442 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 443 if (size == 1) { /* sequential case */ 444 #if defined(PETSC_USE_COMPLEX) 445 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 446 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 447 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 448 #else 449 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 450 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 451 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 452 #endif 453 } else { /* parallel cases */ 454 ptrdiff_t alloc_local,local_n0,local_0_start; 455 ptrdiff_t local_n1; 456 fftw_complex *data_fout; 457 ptrdiff_t local_1_start; 458 #if defined(PETSC_USE_COMPLEX) 459 fftw_complex *data_fin,*data_bout; 460 #else 461 double *data_finr,*data_boutr; 462 PetscInt n1,N1; 463 ptrdiff_t temp; 464 #endif 465 466 switch (ndim) { 467 case 1: 468 #if !defined(PETSC_USE_COMPLEX) 469 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 470 #else 471 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 472 if (fin) { 473 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 474 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 475 476 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 477 } 478 if (fout) { 479 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 480 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 481 482 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 483 } 484 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 485 if (bout) { 486 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 487 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 488 489 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 490 } 491 break; 492 #endif 493 case 2: 494 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 495 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 496 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 497 if (fin) { 498 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 499 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 500 501 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 502 } 503 if (bout) { 504 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 505 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 506 507 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 508 } 509 if (fout) { 510 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 511 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 512 513 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 #else 516 /* Get local size */ 517 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 518 if (fin) { 519 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 520 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 521 522 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 523 } 524 if (fout) { 525 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 526 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 527 528 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 529 } 530 if (bout) { 531 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 532 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 533 534 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 535 } 536 #endif 537 break; 538 case 3: 539 #if !defined(PETSC_USE_COMPLEX) 540 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 541 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 542 if (fin) { 543 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 544 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 545 546 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 547 } 548 if (bout) { 549 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 550 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 551 552 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 553 } 554 555 if (fout) { 556 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 557 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 558 559 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 560 } 561 #else 562 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 563 if (fin) { 564 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 565 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 566 567 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 568 } 569 if (fout) { 570 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 571 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 572 573 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 574 } 575 if (bout) { 576 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 577 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 578 579 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 580 } 581 #endif 582 break; 583 default: 584 #if !defined(PETSC_USE_COMPLEX) 585 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 586 587 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 588 589 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 590 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 591 592 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 593 594 if (fin) { 595 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 596 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 597 598 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 599 } 600 if (bout) { 601 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 602 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 603 604 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 605 } 606 607 if (fout) { 608 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 609 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 610 611 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 612 } 613 #else 614 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 615 if (fin) { 616 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 617 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 618 619 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 620 } 621 if (fout) { 622 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 623 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 624 625 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 626 } 627 if (bout) { 628 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 629 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 630 631 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 632 } 633 #endif 634 break; 635 } 636 } 637 PetscFunctionReturn(0); 638 } 639 640 #undef __FUNCT__ 641 #define __FUNCT__ "VecScatterPetscToFFTW" 642 /*@ 643 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 644 645 Collective on Mat 646 647 Input Parameters: 648 + A - FFTW matrix 649 - x - the PETSc vector 650 651 Output Parameters: 652 . y - the FFTW vector 653 654 Options Database Keys: 655 . -mat_fftw_plannerflags - set FFTW planner flags 656 657 Level: intermediate 658 659 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 660 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 661 zeros. This routine does that job by scattering operation. 662 663 .seealso: VecScatterFFTWToPetsc() 664 @*/ 665 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 666 { 667 PetscErrorCode ierr; 668 669 PetscFunctionBegin; 670 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 676 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 677 { 678 PetscErrorCode ierr; 679 MPI_Comm comm; 680 Mat_FFT *fft = (Mat_FFT*)A->data; 681 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 682 PetscInt N =fft->N; 683 PetscInt ndim =fft->ndim,*dim=fft->dim; 684 PetscInt low; 685 PetscMPIInt rank,size; 686 PetscInt vsize,vsize1; 687 ptrdiff_t local_n0,local_0_start; 688 ptrdiff_t local_n1,local_1_start; 689 VecScatter vecscat; 690 IS list1,list2; 691 #if !defined(PETSC_USE_COMPLEX) 692 PetscInt i,j,k,partial_dim; 693 PetscInt *indx1, *indx2, tempindx, tempindx1; 694 PetscInt N1, n1,NM; 695 ptrdiff_t temp; 696 #endif 697 698 PetscFunctionBegin; 699 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 700 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 701 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 702 ierr = VecGetOwnershipRange(y,&low,NULL); 703 704 if (size==1) { 705 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 706 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 707 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 708 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 709 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 710 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 711 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 712 ierr = ISDestroy(&list1);CHKERRQ(ierr); 713 } else { 714 switch (ndim) { 715 case 1: 716 #if defined(PETSC_USE_COMPLEX) 717 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 718 719 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 720 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 721 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 722 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 723 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 724 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 725 ierr = ISDestroy(&list1);CHKERRQ(ierr); 726 ierr = ISDestroy(&list2);CHKERRQ(ierr); 727 #else 728 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 729 #endif 730 break; 731 case 2: 732 #if defined(PETSC_USE_COMPLEX) 733 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 734 735 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 736 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 737 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 738 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 739 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 740 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 741 ierr = ISDestroy(&list1);CHKERRQ(ierr); 742 ierr = ISDestroy(&list2);CHKERRQ(ierr); 743 #else 744 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 745 746 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 747 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 748 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 749 750 if (dim[1]%2==0) { 751 NM = dim[1]+2; 752 } else { 753 NM = dim[1]+1; 754 } 755 for (i=0; i<local_n0; i++) { 756 for (j=0; j<dim[1]; j++) { 757 tempindx = i*dim[1] + j; 758 tempindx1 = i*NM + j; 759 760 indx1[tempindx]=local_0_start*dim[1]+tempindx; 761 indx2[tempindx]=low+tempindx1; 762 } 763 } 764 765 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 766 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 767 768 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 769 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 771 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 772 ierr = ISDestroy(&list1);CHKERRQ(ierr); 773 ierr = ISDestroy(&list2);CHKERRQ(ierr); 774 ierr = PetscFree(indx1);CHKERRQ(ierr); 775 ierr = PetscFree(indx2);CHKERRQ(ierr); 776 #endif 777 break; 778 779 case 3: 780 #if defined(PETSC_USE_COMPLEX) 781 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 782 783 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 784 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 785 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 786 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 787 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 788 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 789 ierr = ISDestroy(&list1);CHKERRQ(ierr); 790 ierr = ISDestroy(&list2);CHKERRQ(ierr); 791 #else 792 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 793 794 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 795 796 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 797 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 798 799 if (dim[2]%2==0) NM = dim[2]+2; 800 else NM = dim[2]+1; 801 802 for (i=0; i<local_n0; i++) { 803 for (j=0; j<dim[1]; j++) { 804 for (k=0; k<dim[2]; k++) { 805 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 806 tempindx1 = i*dim[1]*NM + j*NM + k; 807 808 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 809 indx2[tempindx]=low+tempindx1; 810 } 811 } 812 } 813 814 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 815 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 816 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 817 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 818 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 819 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 820 ierr = ISDestroy(&list1);CHKERRQ(ierr); 821 ierr = ISDestroy(&list2);CHKERRQ(ierr); 822 ierr = PetscFree(indx1);CHKERRQ(ierr); 823 ierr = PetscFree(indx2);CHKERRQ(ierr); 824 #endif 825 break; 826 827 default: 828 #if defined(PETSC_USE_COMPLEX) 829 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 830 831 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 832 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 833 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 834 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 835 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 836 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 837 ierr = ISDestroy(&list1);CHKERRQ(ierr); 838 ierr = ISDestroy(&list2);CHKERRQ(ierr); 839 #else 840 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 841 842 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 843 844 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 845 846 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 847 848 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 849 850 partial_dim = fftw->partial_dim; 851 852 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 853 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 854 855 if (dim[ndim-1]%2==0) NM = 2; 856 else NM = 1; 857 858 j = low; 859 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 860 indx1[i] = local_0_start*partial_dim + i; 861 indx2[i] = j; 862 if (k%dim[ndim-1]==0) j+=NM; 863 j++; 864 } 865 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 866 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 867 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 868 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 870 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 871 ierr = ISDestroy(&list1);CHKERRQ(ierr); 872 ierr = ISDestroy(&list2);CHKERRQ(ierr); 873 ierr = PetscFree(indx1);CHKERRQ(ierr); 874 ierr = PetscFree(indx2);CHKERRQ(ierr); 875 #endif 876 break; 877 } 878 } 879 PetscFunctionReturn(0); 880 } 881 882 #undef __FUNCT__ 883 #define __FUNCT__ "VecScatterFFTWToPetsc" 884 /*@ 885 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 886 887 Collective on Mat 888 889 Input Parameters: 890 + A - FFTW matrix 891 - x - FFTW vector 892 893 Output Parameters: 894 . y - PETSc vector 895 896 Level: intermediate 897 898 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 899 VecScatterFFTWToPetsc removes those extra zeros. 900 901 .seealso: VecScatterPetscToFFTW() 902 @*/ 903 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 904 { 905 PetscErrorCode ierr; 906 907 PetscFunctionBegin; 908 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 #undef __FUNCT__ 913 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 914 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 915 { 916 PetscErrorCode ierr; 917 MPI_Comm comm; 918 Mat_FFT *fft = (Mat_FFT*)A->data; 919 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 920 PetscInt N = fft->N; 921 PetscInt ndim = fft->ndim,*dim=fft->dim; 922 PetscInt low; 923 PetscMPIInt rank,size; 924 ptrdiff_t local_n0,local_0_start; 925 ptrdiff_t local_n1,local_1_start; 926 VecScatter vecscat; 927 IS list1,list2; 928 #if !defined(PETSC_USE_COMPLEX) 929 PetscInt i,j,k,partial_dim; 930 PetscInt *indx1, *indx2, tempindx, tempindx1; 931 PetscInt N1, n1,NM; 932 ptrdiff_t temp; 933 #endif 934 935 PetscFunctionBegin; 936 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 937 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 938 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 939 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 940 941 if (size==1) { 942 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 943 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 944 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 947 ierr = ISDestroy(&list1);CHKERRQ(ierr); 948 949 } else { 950 switch (ndim) { 951 case 1: 952 #if defined(PETSC_USE_COMPLEX) 953 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 954 955 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 956 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 957 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 958 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 961 ierr = ISDestroy(&list1);CHKERRQ(ierr); 962 ierr = ISDestroy(&list2);CHKERRQ(ierr); 963 #else 964 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 965 #endif 966 break; 967 case 2: 968 #if defined(PETSC_USE_COMPLEX) 969 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 970 971 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 972 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 973 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 974 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 977 ierr = ISDestroy(&list1);CHKERRQ(ierr); 978 ierr = ISDestroy(&list2);CHKERRQ(ierr); 979 #else 980 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 981 982 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 983 984 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 985 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 986 987 if (dim[1]%2==0) NM = dim[1]+2; 988 else NM = dim[1]+1; 989 990 for (i=0; i<local_n0; i++) { 991 for (j=0; j<dim[1]; j++) { 992 tempindx = i*dim[1] + j; 993 tempindx1 = i*NM + j; 994 995 indx1[tempindx]=local_0_start*dim[1]+tempindx; 996 indx2[tempindx]=low+tempindx1; 997 } 998 } 999 1000 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1001 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1002 1003 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1004 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1007 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1008 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1009 ierr = PetscFree(indx1);CHKERRQ(ierr); 1010 ierr = PetscFree(indx2);CHKERRQ(ierr); 1011 #endif 1012 break; 1013 case 3: 1014 #if defined(PETSC_USE_COMPLEX) 1015 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1016 1017 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1018 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1019 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1020 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1021 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1022 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1023 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1024 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1025 #else 1026 1027 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1028 1029 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1030 1031 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1032 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1033 1034 if (dim[2]%2==0) NM = dim[2]+2; 1035 else NM = dim[2]+1; 1036 1037 for (i=0; i<local_n0; i++) { 1038 for (j=0; j<dim[1]; j++) { 1039 for (k=0; k<dim[2]; k++) { 1040 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1041 tempindx1 = i*dim[1]*NM + j*NM + k; 1042 1043 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1044 indx2[tempindx]=low+tempindx1; 1045 } 1046 } 1047 } 1048 1049 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1050 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1051 1052 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1053 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1056 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1057 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1058 ierr = PetscFree(indx1);CHKERRQ(ierr); 1059 ierr = PetscFree(indx2);CHKERRQ(ierr); 1060 #endif 1061 break; 1062 default: 1063 #if defined(PETSC_USE_COMPLEX) 1064 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1065 1066 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1067 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1068 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1069 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1071 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1072 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1073 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1074 #else 1075 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1076 1077 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1078 1079 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1080 1081 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1082 1083 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1084 1085 partial_dim = fftw->partial_dim; 1086 1087 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1088 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1089 1090 if (dim[ndim-1]%2==0) NM = 2; 1091 else NM = 1; 1092 1093 j = low; 1094 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1095 indx1[i] = local_0_start*partial_dim + i; 1096 indx2[i] = j; 1097 if (k%dim[ndim-1]==0) j+=NM; 1098 j++; 1099 } 1100 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1101 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1102 1103 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1104 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1105 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1106 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1107 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1108 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1109 ierr = PetscFree(indx1);CHKERRQ(ierr); 1110 ierr = PetscFree(indx2);CHKERRQ(ierr); 1111 #endif 1112 break; 1113 } 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatCreate_FFTW" 1120 /* 1121 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1122 1123 Options Database Keys: 1124 + -mat_fftw_plannerflags - set FFTW planner flags 1125 1126 Level: intermediate 1127 1128 */ 1129 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1130 { 1131 PetscErrorCode ierr; 1132 MPI_Comm comm; 1133 Mat_FFT *fft=(Mat_FFT*)A->data; 1134 Mat_FFTW *fftw; 1135 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1136 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1137 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1138 PetscBool flg; 1139 PetscInt p_flag,partial_dim=1,ctr; 1140 PetscMPIInt size,rank; 1141 ptrdiff_t *pdim; 1142 ptrdiff_t local_n1,local_1_start; 1143 #if !defined(PETSC_USE_COMPLEX) 1144 ptrdiff_t temp; 1145 PetscInt N1,tot_dim; 1146 #else 1147 PetscInt n1; 1148 #endif 1149 1150 PetscFunctionBegin; 1151 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1152 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1153 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1154 1155 fftw_mpi_init(); 1156 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1157 pdim[0] = dim[0]; 1158 #if !defined(PETSC_USE_COMPLEX) 1159 tot_dim = 2*dim[0]; 1160 #endif 1161 for (ctr=1; ctr<ndim; ctr++) { 1162 partial_dim *= dim[ctr]; 1163 pdim[ctr] = dim[ctr]; 1164 #if !defined(PETSC_USE_COMPLEX) 1165 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1166 else tot_dim *= dim[ctr]; 1167 #endif 1168 } 1169 1170 if (size == 1) { 1171 #if defined(PETSC_USE_COMPLEX) 1172 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1173 n = N; 1174 #else 1175 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1176 n = tot_dim; 1177 #endif 1178 1179 } else { 1180 ptrdiff_t local_n0,local_0_start; 1181 switch (ndim) { 1182 case 1: 1183 #if !defined(PETSC_USE_COMPLEX) 1184 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1185 #else 1186 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1187 1188 n = (PetscInt)local_n0; 1189 n1 = (PetscInt)local_n1; 1190 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1191 #endif 1192 break; 1193 case 2: 1194 #if defined(PETSC_USE_COMPLEX) 1195 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1196 /* 1197 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1198 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1199 */ 1200 n = (PetscInt)local_n0*dim[1]; 1201 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1202 #else 1203 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1204 1205 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1206 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1207 #endif 1208 break; 1209 case 3: 1210 #if defined(PETSC_USE_COMPLEX) 1211 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1212 1213 n = (PetscInt)local_n0*dim[1]*dim[2]; 1214 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1215 #else 1216 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1217 1218 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1219 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1220 #endif 1221 break; 1222 default: 1223 #if defined(PETSC_USE_COMPLEX) 1224 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1225 1226 n = (PetscInt)local_n0*partial_dim; 1227 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1228 #else 1229 temp = pdim[ndim-1]; 1230 1231 pdim[ndim-1] = temp/2 + 1; 1232 1233 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1234 1235 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1236 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1237 1238 pdim[ndim-1] = temp; 1239 1240 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1241 #endif 1242 break; 1243 } 1244 } 1245 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1246 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1247 fft->data = (void*)fftw; 1248 1249 fft->n = n; 1250 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1251 fftw->partial_dim = partial_dim; 1252 1253 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1254 if (size == 1) { 1255 #if defined(PETSC_USE_64BIT_INDICES) 1256 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1257 #else 1258 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1259 #endif 1260 } 1261 1262 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1263 1264 fftw->p_forward = 0; 1265 fftw->p_backward = 0; 1266 fftw->p_flag = FFTW_ESTIMATE; 1267 1268 if (size == 1) { 1269 A->ops->mult = MatMult_SeqFFTW; 1270 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1271 } else { 1272 A->ops->mult = MatMult_MPIFFTW; 1273 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1274 } 1275 fft->matdestroy = MatDestroy_FFTW; 1276 A->assembled = PETSC_TRUE; 1277 A->preallocated = PETSC_TRUE; 1278 1279 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1282 1283 /* get runtime options */ 1284 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1285 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1286 if (flg) { 1287 fftw->p_flag = iplans[p_flag]; 1288 } 1289 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 1294 1295 1296