1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #if !PetscDefined(HAVE_MPIUNI) 10 #include <fftw3-mpi.h> 11 #else 12 #include <fftw3.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 ptrdiff_t ndim_fftw,*dim_fftw; 18 #if defined(PETSC_USE_64BIT_INDICES) 19 fftw_iodim64 *iodims; 20 #else 21 fftw_iodim *iodims; 22 #endif 23 PetscInt partial_dim; 24 fftw_plan p_forward,p_backward; 25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 27 executed for the arrays with which the plan was created */ 28 } Mat_FFTW; 29 30 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 32 extern PetscErrorCode MatDestroy_FFTW(Mat); 33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*); 34 #if !PetscDefined(HAVE_MPIUNI) 35 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 38 #endif 39 40 /* 41 MatMult_SeqFFTW performs forward DFT 42 Input parameter: 43 A - the matrix 44 x - the vector on which FDFT will be performed 45 46 Output parameter: 47 y - vector that stores result of FDFT 48 */ 49 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 50 { 51 Mat_FFT *fft = (Mat_FFT*)A->data; 52 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 53 const PetscScalar *x_array; 54 PetscScalar *y_array; 55 #if defined(PETSC_USE_COMPLEX) 56 #if defined(PETSC_USE_64BIT_INDICES) 57 fftw_iodim64 *iodims; 58 #else 59 fftw_iodim *iodims; 60 #endif 61 PetscInt i; 62 #endif 63 PetscInt ndim = fft->ndim,*dim = fft->dim; 64 65 PetscFunctionBegin; 66 CHKERRQ(VecGetArrayRead(x,&x_array)); 67 CHKERRQ(VecGetArray(y,&y_array)); 68 if (!fftw->p_forward) { /* create a plan, then excute it */ 69 switch (ndim) { 70 case 1: 71 #if defined(PETSC_USE_COMPLEX) 72 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 73 #else 74 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 75 #endif 76 break; 77 case 2: 78 #if defined(PETSC_USE_COMPLEX) 79 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 80 #else 81 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 82 #endif 83 break; 84 case 3: 85 #if defined(PETSC_USE_COMPLEX) 86 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 87 #else 88 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 89 #endif 90 break; 91 default: 92 #if defined(PETSC_USE_COMPLEX) 93 iodims = fftw->iodims; 94 #if defined(PETSC_USE_64BIT_INDICES) 95 if (ndim) { 96 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 97 iodims[ndim-1].is = iodims[ndim-1].os = 1; 98 for (i=ndim-2; i>=0; --i) { 99 iodims[i].n = (ptrdiff_t)dim[i]; 100 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 101 } 102 } 103 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 104 #else 105 if (ndim) { 106 iodims[ndim-1].n = (int)dim[ndim-1]; 107 iodims[ndim-1].is = iodims[ndim-1].os = 1; 108 for (i=ndim-2; i>=0; --i) { 109 iodims[i].n = (int)dim[i]; 110 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 111 } 112 } 113 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 114 #endif 115 116 #else 117 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 118 #endif 119 break; 120 } 121 fftw->finarray = (PetscScalar *) x_array; 122 fftw->foutarray = y_array; 123 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 124 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 125 fftw_execute(fftw->p_forward); 126 } else { /* use existing plan */ 127 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 128 #if defined(PETSC_USE_COMPLEX) 129 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 130 #else 131 fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 132 #endif 133 } else { 134 fftw_execute(fftw->p_forward); 135 } 136 } 137 CHKERRQ(VecRestoreArray(y,&y_array)); 138 CHKERRQ(VecRestoreArrayRead(x,&x_array)); 139 PetscFunctionReturn(0); 140 } 141 142 /* MatMultTranspose_SeqFFTW performs serial backward DFT 143 Input parameter: 144 A - the matrix 145 x - the vector on which BDFT will be performed 146 147 Output parameter: 148 y - vector that stores result of BDFT 149 */ 150 151 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 152 { 153 Mat_FFT *fft = (Mat_FFT*)A->data; 154 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 155 const PetscScalar *x_array; 156 PetscScalar *y_array; 157 PetscInt ndim = fft->ndim,*dim = fft->dim; 158 #if defined(PETSC_USE_COMPLEX) 159 #if defined(PETSC_USE_64BIT_INDICES) 160 fftw_iodim64 *iodims = fftw->iodims; 161 #else 162 fftw_iodim *iodims = fftw->iodims; 163 #endif 164 #endif 165 166 PetscFunctionBegin; 167 CHKERRQ(VecGetArrayRead(x,&x_array)); 168 CHKERRQ(VecGetArray(y,&y_array)); 169 if (!fftw->p_backward) { /* create a plan, then excute it */ 170 switch (ndim) { 171 case 1: 172 #if defined(PETSC_USE_COMPLEX) 173 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 174 #else 175 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 176 #endif 177 break; 178 case 2: 179 #if defined(PETSC_USE_COMPLEX) 180 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 181 #else 182 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 183 #endif 184 break; 185 case 3: 186 #if defined(PETSC_USE_COMPLEX) 187 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 188 #else 189 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 190 #endif 191 break; 192 default: 193 #if defined(PETSC_USE_COMPLEX) 194 #if defined(PETSC_USE_64BIT_INDICES) 195 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 196 #else 197 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 198 #endif 199 #else 200 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 201 #endif 202 break; 203 } 204 fftw->binarray = (PetscScalar *) x_array; 205 fftw->boutarray = y_array; 206 fftw_execute(fftw->p_backward); 207 } else { /* use existing plan */ 208 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 209 #if defined(PETSC_USE_COMPLEX) 210 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 211 #else 212 fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 213 #endif 214 } else { 215 fftw_execute(fftw->p_backward); 216 } 217 } 218 CHKERRQ(VecRestoreArray(y,&y_array)); 219 CHKERRQ(VecRestoreArrayRead(x,&x_array)); 220 PetscFunctionReturn(0); 221 } 222 223 #if !PetscDefined(HAVE_MPIUNI) 224 /* MatMult_MPIFFTW performs forward DFT in parallel 225 Input parameter: 226 A - the matrix 227 x - the vector on which FDFT will be performed 228 229 Output parameter: 230 y - vector that stores result of FDFT 231 */ 232 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 233 { 234 Mat_FFT *fft = (Mat_FFT*)A->data; 235 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 236 const PetscScalar *x_array; 237 PetscScalar *y_array; 238 PetscInt ndim = fft->ndim,*dim = fft->dim; 239 MPI_Comm comm; 240 241 PetscFunctionBegin; 242 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 243 CHKERRQ(VecGetArrayRead(x,&x_array)); 244 CHKERRQ(VecGetArray(y,&y_array)); 245 if (!fftw->p_forward) { /* create a plan, then excute it */ 246 switch (ndim) { 247 case 1: 248 #if defined(PETSC_USE_COMPLEX) 249 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 250 #else 251 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 252 #endif 253 break; 254 case 2: 255 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 256 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 257 #else 258 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 259 #endif 260 break; 261 case 3: 262 #if defined(PETSC_USE_COMPLEX) 263 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 264 #else 265 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 266 #endif 267 break; 268 default: 269 #if defined(PETSC_USE_COMPLEX) 270 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 271 #else 272 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 273 #endif 274 break; 275 } 276 fftw->finarray = (PetscScalar *) x_array; 277 fftw->foutarray = y_array; 278 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 279 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 280 fftw_execute(fftw->p_forward); 281 } else { /* use existing plan */ 282 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 283 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 284 } else { 285 fftw_execute(fftw->p_forward); 286 } 287 } 288 CHKERRQ(VecRestoreArray(y,&y_array)); 289 CHKERRQ(VecRestoreArrayRead(x,&x_array)); 290 PetscFunctionReturn(0); 291 } 292 293 /* 294 MatMultTranspose_MPIFFTW performs parallel backward DFT 295 Input parameter: 296 A - the matrix 297 x - the vector on which BDFT will be performed 298 299 Output parameter: 300 y - vector that stores result of BDFT 301 */ 302 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 303 { 304 Mat_FFT *fft = (Mat_FFT*)A->data; 305 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 306 const PetscScalar *x_array; 307 PetscScalar *y_array; 308 PetscInt ndim = fft->ndim,*dim = fft->dim; 309 MPI_Comm comm; 310 311 PetscFunctionBegin; 312 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 313 CHKERRQ(VecGetArrayRead(x,&x_array)); 314 CHKERRQ(VecGetArray(y,&y_array)); 315 if (!fftw->p_backward) { /* create a plan, then excute it */ 316 switch (ndim) { 317 case 1: 318 #if defined(PETSC_USE_COMPLEX) 319 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 320 #else 321 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 322 #endif 323 break; 324 case 2: 325 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 326 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 327 #else 328 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 329 #endif 330 break; 331 case 3: 332 #if defined(PETSC_USE_COMPLEX) 333 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 334 #else 335 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 336 #endif 337 break; 338 default: 339 #if defined(PETSC_USE_COMPLEX) 340 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 341 #else 342 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 343 #endif 344 break; 345 } 346 fftw->binarray = (PetscScalar *) x_array; 347 fftw->boutarray = y_array; 348 fftw_execute(fftw->p_backward); 349 } else { /* use existing plan */ 350 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 351 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 352 } else { 353 fftw_execute(fftw->p_backward); 354 } 355 } 356 CHKERRQ(VecRestoreArray(y,&y_array)); 357 CHKERRQ(VecRestoreArrayRead(x,&x_array)); 358 PetscFunctionReturn(0); 359 } 360 #endif 361 362 PetscErrorCode MatDestroy_FFTW(Mat A) 363 { 364 Mat_FFT *fft = (Mat_FFT*)A->data; 365 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 366 367 PetscFunctionBegin; 368 fftw_destroy_plan(fftw->p_forward); 369 fftw_destroy_plan(fftw->p_backward); 370 if (fftw->iodims) { 371 free(fftw->iodims); 372 } 373 CHKERRQ(PetscFree(fftw->dim_fftw)); 374 CHKERRQ(PetscFree(fft->data)); 375 #if !PetscDefined(HAVE_MPIUNI) 376 fftw_mpi_cleanup(); 377 #endif 378 PetscFunctionReturn(0); 379 } 380 381 #if !PetscDefined(HAVE_MPIUNI) 382 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 383 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 384 { 385 PetscScalar *array; 386 387 PetscFunctionBegin; 388 CHKERRQ(VecGetArray(v,&array)); 389 fftw_free((fftw_complex*)array); 390 CHKERRQ(VecRestoreArray(v,&array)); 391 CHKERRQ(VecDestroy_MPI(v)); 392 PetscFunctionReturn(0); 393 } 394 #endif 395 396 #if !PetscDefined(HAVE_MPIUNI) 397 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new) 398 { 399 Mat A; 400 401 PetscFunctionBegin; 402 CHKERRQ(PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A)); 403 CHKERRQ(MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL)); 404 PetscFunctionReturn(0); 405 } 406 407 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new) 408 { 409 Mat A; 410 411 PetscFunctionBegin; 412 CHKERRQ(PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A)); 413 CHKERRQ(MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL)); 414 PetscFunctionReturn(0); 415 } 416 417 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 418 { 419 Mat A; 420 421 PetscFunctionBegin; 422 CHKERRQ(PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A)); 423 CHKERRQ(MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new)); 424 PetscFunctionReturn(0); 425 } 426 #endif 427 428 /*@ 429 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 430 parallel layout determined by FFTW 431 432 Collective on Mat 433 434 Input Parameter: 435 . A - the matrix 436 437 Output Parameters: 438 + x - (optional) input vector of forward FFTW 439 . y - (optional) output vector of forward FFTW 440 - z - (optional) output vector of backward FFTW 441 442 Level: advanced 443 444 Note: The parallel layout of output of forward FFTW is always same as the input 445 of the backward FFTW. But parallel layout of the input vector of forward 446 FFTW might not be same as the output of backward FFTW. 447 Also note that we need to provide enough space while doing parallel real transform. 448 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 449 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 450 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 451 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 452 zeros if it is odd only one zero is needed. 453 Lastly one needs some scratch space at the end of data set in each process. alloc_local 454 figures out how much space is needed, i.e. it figures out the data+scratch space for 455 each processor and returns that. 456 457 .seealso: MatCreateFFT() 458 @*/ 459 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 460 { 461 PetscFunctionBegin; 462 CHKERRQ(PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z))); 463 PetscFunctionReturn(0); 464 } 465 466 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 467 { 468 PetscMPIInt size,rank; 469 MPI_Comm comm; 470 Mat_FFT *fft = (Mat_FFT*)A->data; 471 472 PetscFunctionBegin; 473 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 474 PetscValidType(A,1); 475 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 476 477 CHKERRMPI(MPI_Comm_size(comm, &size)); 478 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 479 if (size == 1) { /* sequential case */ 480 #if defined(PETSC_USE_COMPLEX) 481 if (fin) CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,fft->N,fin)); 482 if (fout) CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,fft->N,fout)); 483 if (bout) CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,fft->N,bout)); 484 #else 485 if (fin) CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,fft->n,fin)); 486 if (fout) CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,fft->n,fout)); 487 if (bout) CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,fft->n,bout)); 488 #endif 489 #if !PetscDefined(HAVE_MPIUNI) 490 } else { /* parallel cases */ 491 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 492 PetscInt ndim = fft->ndim,*dim = fft->dim; 493 ptrdiff_t alloc_local,local_n0,local_0_start; 494 ptrdiff_t local_n1; 495 fftw_complex *data_fout; 496 ptrdiff_t local_1_start; 497 #if defined(PETSC_USE_COMPLEX) 498 fftw_complex *data_fin,*data_bout; 499 #else 500 double *data_finr,*data_boutr; 501 PetscInt n1,N1; 502 ptrdiff_t temp; 503 #endif 504 505 switch (ndim) { 506 case 1: 507 #if !defined(PETSC_USE_COMPLEX) 508 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 509 #else 510 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 511 if (fin) { 512 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 513 CHKERRQ(VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin)); 514 CHKERRQ(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 515 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 516 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 517 } 518 if (fout) { 519 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 520 CHKERRQ(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout)); 521 CHKERRQ(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 522 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 523 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 524 } 525 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 526 if (bout) { 527 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 528 CHKERRQ(VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout)); 529 CHKERRQ(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 530 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 531 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 532 } 533 break; 534 #endif 535 case 2: 536 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 537 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 538 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 539 if (fin) { 540 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 541 CHKERRQ(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin)); 542 CHKERRQ(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 543 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 544 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 545 } 546 if (fout) { 547 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 548 CHKERRQ(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout)); 549 CHKERRQ(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 550 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 551 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 if (bout) { 554 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 555 CHKERRQ(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout)); 556 CHKERRQ(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 557 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 558 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 559 } 560 #else 561 /* Get local size */ 562 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 563 if (fin) { 564 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 565 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 566 CHKERRQ(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 567 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 568 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 569 } 570 if (fout) { 571 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 572 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 573 CHKERRQ(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 574 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 575 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 576 } 577 if (bout) { 578 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 579 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 580 CHKERRQ(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 581 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 582 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 583 } 584 #endif 585 break; 586 case 3: 587 #if !defined(PETSC_USE_COMPLEX) 588 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 589 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 590 if (fin) { 591 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 592 CHKERRQ(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin)); 593 CHKERRQ(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 594 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 595 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 596 } 597 if (fout) { 598 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 599 CHKERRQ(VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout)); 600 CHKERRQ(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 601 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 602 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 603 } 604 if (bout) { 605 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 606 CHKERRQ(VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout)); 607 CHKERRQ(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 608 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 609 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 610 } 611 #else 612 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 613 if (fin) { 614 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 615 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 616 CHKERRQ(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 617 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 618 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 619 } 620 if (fout) { 621 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 622 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 623 CHKERRQ(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 624 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 625 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 626 } 627 if (bout) { 628 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 629 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 630 CHKERRQ(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 631 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 632 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 633 } 634 #endif 635 break; 636 default: 637 #if !defined(PETSC_USE_COMPLEX) 638 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 639 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 640 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 641 N1 = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 642 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 643 644 if (fin) { 645 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 646 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin)); 647 CHKERRQ(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 648 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 649 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 650 } 651 if (fout) { 652 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 653 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout)); 654 CHKERRQ(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 655 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 656 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 657 } 658 if (bout) { 659 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 660 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout)); 661 CHKERRQ(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 662 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 663 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 664 } 665 #else 666 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 667 if (fin) { 668 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 669 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin)); 670 CHKERRQ(PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A)); 671 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 672 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 673 } 674 if (fout) { 675 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 676 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout)); 677 CHKERRQ(PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A)); 678 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 679 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 680 } 681 if (bout) { 682 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 683 CHKERRQ(VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout)); 684 CHKERRQ(PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A)); 685 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 686 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 687 } 688 #endif 689 break; 690 } 691 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 692 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 693 memory leaks. We void these pointers here to avoid acident uses. 694 */ 695 if (fin) (*fin)->ops->replacearray = NULL; 696 if (fout) (*fout)->ops->replacearray = NULL; 697 if (bout) (*bout)->ops->replacearray = NULL; 698 #endif 699 } 700 PetscFunctionReturn(0); 701 } 702 703 /*@ 704 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 705 706 Collective on Mat 707 708 Input Parameters: 709 + A - FFTW matrix 710 - x - the PETSc vector 711 712 Output Parameters: 713 . y - the FFTW vector 714 715 Options Database Keys: 716 . -mat_fftw_plannerflags - set FFTW planner flags 717 718 Level: intermediate 719 720 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 721 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 722 zeros. This routine does that job by scattering operation. 723 724 .seealso: VecScatterFFTWToPetsc() 725 @*/ 726 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 727 { 728 PetscFunctionBegin; 729 CHKERRQ(PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y))); 730 PetscFunctionReturn(0); 731 } 732 733 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 734 { 735 MPI_Comm comm; 736 Mat_FFT *fft = (Mat_FFT*)A->data; 737 PetscInt low; 738 PetscMPIInt rank,size; 739 PetscInt vsize,vsize1; 740 VecScatter vecscat; 741 IS list1; 742 743 PetscFunctionBegin; 744 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 745 CHKERRMPI(MPI_Comm_size(comm, &size)); 746 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 747 CHKERRQ(VecGetOwnershipRange(y,&low,NULL)); 748 749 if (size==1) { 750 CHKERRQ(VecGetSize(x,&vsize)); 751 CHKERRQ(VecGetSize(y,&vsize1)); 752 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1)); 753 CHKERRQ(VecScatterCreate(x,list1,y,list1,&vecscat)); 754 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 755 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 756 CHKERRQ(VecScatterDestroy(&vecscat)); 757 CHKERRQ(ISDestroy(&list1)); 758 #if !PetscDefined(HAVE_MPIUNI) 759 } else { 760 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 761 PetscInt ndim = fft->ndim,*dim = fft->dim; 762 ptrdiff_t local_n0,local_0_start; 763 ptrdiff_t local_n1,local_1_start; 764 IS list2; 765 #if !defined(PETSC_USE_COMPLEX) 766 PetscInt i,j,k,partial_dim; 767 PetscInt *indx1, *indx2, tempindx, tempindx1; 768 PetscInt NM; 769 ptrdiff_t temp; 770 #endif 771 772 switch (ndim) { 773 case 1: 774 #if defined(PETSC_USE_COMPLEX) 775 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 776 777 CHKERRQ(ISCreateStride(comm,local_n0,local_0_start,1,&list1)); 778 CHKERRQ(ISCreateStride(comm,local_n0,low,1,&list2)); 779 CHKERRQ(VecScatterCreate(x,list1,y,list2,&vecscat)); 780 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 781 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 782 CHKERRQ(VecScatterDestroy(&vecscat)); 783 CHKERRQ(ISDestroy(&list1)); 784 CHKERRQ(ISDestroy(&list2)); 785 #else 786 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 787 #endif 788 break; 789 case 2: 790 #if defined(PETSC_USE_COMPLEX) 791 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 792 793 CHKERRQ(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1)); 794 CHKERRQ(ISCreateStride(comm,local_n0*dim[1],low,1,&list2)); 795 CHKERRQ(VecScatterCreate(x,list1,y,list2,&vecscat)); 796 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 797 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 798 CHKERRQ(VecScatterDestroy(&vecscat)); 799 CHKERRQ(ISDestroy(&list1)); 800 CHKERRQ(ISDestroy(&list2)); 801 #else 802 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 803 804 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1)); 805 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2)); 806 807 if (dim[1]%2==0) { 808 NM = dim[1]+2; 809 } else { 810 NM = dim[1]+1; 811 } 812 for (i=0; i<local_n0; i++) { 813 for (j=0; j<dim[1]; j++) { 814 tempindx = i*dim[1] + j; 815 tempindx1 = i*NM + j; 816 817 indx1[tempindx]=local_0_start*dim[1]+tempindx; 818 indx2[tempindx]=low+tempindx1; 819 } 820 } 821 822 CHKERRQ(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1)); 823 CHKERRQ(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2)); 824 825 CHKERRQ(VecScatterCreate(x,list1,y,list2,&vecscat)); 826 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 827 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 828 CHKERRQ(VecScatterDestroy(&vecscat)); 829 CHKERRQ(ISDestroy(&list1)); 830 CHKERRQ(ISDestroy(&list2)); 831 CHKERRQ(PetscFree(indx1)); 832 CHKERRQ(PetscFree(indx2)); 833 #endif 834 break; 835 836 case 3: 837 #if defined(PETSC_USE_COMPLEX) 838 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 839 840 CHKERRQ(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1)); 841 CHKERRQ(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2)); 842 CHKERRQ(VecScatterCreate(x,list1,y,list2,&vecscat)); 843 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 844 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 845 CHKERRQ(VecScatterDestroy(&vecscat)); 846 CHKERRQ(ISDestroy(&list1)); 847 CHKERRQ(ISDestroy(&list2)); 848 #else 849 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 850 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 851 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 852 853 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1)); 854 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2)); 855 856 if (dim[2]%2==0) NM = dim[2]+2; 857 else NM = dim[2]+1; 858 859 for (i=0; i<local_n0; i++) { 860 for (j=0; j<dim[1]; j++) { 861 for (k=0; k<dim[2]; k++) { 862 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 863 tempindx1 = i*dim[1]*NM + j*NM + k; 864 865 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 866 indx2[tempindx]=low+tempindx1; 867 } 868 } 869 } 870 871 CHKERRQ(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1)); 872 CHKERRQ(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2)); 873 CHKERRQ(VecScatterCreate(x,list1,y,list2,&vecscat)); 874 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 875 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 876 CHKERRQ(VecScatterDestroy(&vecscat)); 877 CHKERRQ(ISDestroy(&list1)); 878 CHKERRQ(ISDestroy(&list2)); 879 CHKERRQ(PetscFree(indx1)); 880 CHKERRQ(PetscFree(indx2)); 881 #endif 882 break; 883 884 default: 885 #if defined(PETSC_USE_COMPLEX) 886 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 887 888 CHKERRQ(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1)); 889 CHKERRQ(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2)); 890 CHKERRQ(VecScatterCreate(x,list1,y,list2,&vecscat)); 891 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 892 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 893 CHKERRQ(VecScatterDestroy(&vecscat)); 894 CHKERRQ(ISDestroy(&list1)); 895 CHKERRQ(ISDestroy(&list2)); 896 #else 897 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 898 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 899 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 900 901 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 902 903 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 904 905 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 906 907 partial_dim = fftw->partial_dim; 908 909 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1)); 910 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2)); 911 912 if (dim[ndim-1]%2==0) NM = 2; 913 else NM = 1; 914 915 j = low; 916 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 917 indx1[i] = local_0_start*partial_dim + i; 918 indx2[i] = j; 919 if (k%dim[ndim-1]==0) j+=NM; 920 j++; 921 } 922 CHKERRQ(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1)); 923 CHKERRQ(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2)); 924 CHKERRQ(VecScatterCreate(x,list1,y,list2,&vecscat)); 925 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 926 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 927 CHKERRQ(VecScatterDestroy(&vecscat)); 928 CHKERRQ(ISDestroy(&list1)); 929 CHKERRQ(ISDestroy(&list2)); 930 CHKERRQ(PetscFree(indx1)); 931 CHKERRQ(PetscFree(indx2)); 932 #endif 933 break; 934 } 935 #endif 936 } 937 PetscFunctionReturn(0); 938 } 939 940 /*@ 941 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 942 943 Collective on Mat 944 945 Input Parameters: 946 + A - FFTW matrix 947 - x - FFTW vector 948 949 Output Parameters: 950 . y - PETSc vector 951 952 Level: intermediate 953 954 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 955 VecScatterFFTWToPetsc removes those extra zeros. 956 957 .seealso: VecScatterPetscToFFTW() 958 @*/ 959 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 960 { 961 PetscFunctionBegin; 962 CHKERRQ(PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y))); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 967 { 968 MPI_Comm comm; 969 Mat_FFT *fft = (Mat_FFT*)A->data; 970 PetscInt low; 971 PetscMPIInt rank,size; 972 VecScatter vecscat; 973 IS list1; 974 975 PetscFunctionBegin; 976 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 977 CHKERRMPI(MPI_Comm_size(comm, &size)); 978 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 979 CHKERRQ(VecGetOwnershipRange(x,&low,NULL)); 980 981 if (size==1) { 982 CHKERRQ(ISCreateStride(comm,fft->N,0,1,&list1)); 983 CHKERRQ(VecScatterCreate(x,list1,y,list1,&vecscat)); 984 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 985 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 986 CHKERRQ(VecScatterDestroy(&vecscat)); 987 CHKERRQ(ISDestroy(&list1)); 988 989 #if !PetscDefined(HAVE_MPIUNI) 990 } else { 991 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 992 PetscInt ndim = fft->ndim,*dim = fft->dim; 993 ptrdiff_t local_n0,local_0_start; 994 ptrdiff_t local_n1,local_1_start; 995 IS list2; 996 #if !defined(PETSC_USE_COMPLEX) 997 PetscInt i,j,k,partial_dim; 998 PetscInt *indx1, *indx2, tempindx, tempindx1; 999 PetscInt NM; 1000 ptrdiff_t temp; 1001 #endif 1002 switch (ndim) { 1003 case 1: 1004 #if defined(PETSC_USE_COMPLEX) 1005 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1006 1007 CHKERRQ(ISCreateStride(comm,local_n1,local_1_start,1,&list1)); 1008 CHKERRQ(ISCreateStride(comm,local_n1,low,1,&list2)); 1009 CHKERRQ(VecScatterCreate(x,list1,y,list2,&vecscat)); 1010 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1011 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1012 CHKERRQ(VecScatterDestroy(&vecscat)); 1013 CHKERRQ(ISDestroy(&list1)); 1014 CHKERRQ(ISDestroy(&list2)); 1015 #else 1016 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1017 #endif 1018 break; 1019 case 2: 1020 #if defined(PETSC_USE_COMPLEX) 1021 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1022 1023 CHKERRQ(ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1)); 1024 CHKERRQ(ISCreateStride(comm,local_n0*dim[1],low,1,&list2)); 1025 CHKERRQ(VecScatterCreate(x,list2,y,list1,&vecscat)); 1026 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1027 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1028 CHKERRQ(VecScatterDestroy(&vecscat)); 1029 CHKERRQ(ISDestroy(&list1)); 1030 CHKERRQ(ISDestroy(&list2)); 1031 #else 1032 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1033 1034 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1)); 1035 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2)); 1036 1037 if (dim[1]%2==0) NM = dim[1]+2; 1038 else NM = dim[1]+1; 1039 1040 for (i=0; i<local_n0; i++) { 1041 for (j=0; j<dim[1]; j++) { 1042 tempindx = i*dim[1] + j; 1043 tempindx1 = i*NM + j; 1044 1045 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1046 indx2[tempindx]=low+tempindx1; 1047 } 1048 } 1049 1050 CHKERRQ(ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1)); 1051 CHKERRQ(ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2)); 1052 1053 CHKERRQ(VecScatterCreate(x,list2,y,list1,&vecscat)); 1054 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1055 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1056 CHKERRQ(VecScatterDestroy(&vecscat)); 1057 CHKERRQ(ISDestroy(&list1)); 1058 CHKERRQ(ISDestroy(&list2)); 1059 CHKERRQ(PetscFree(indx1)); 1060 CHKERRQ(PetscFree(indx2)); 1061 #endif 1062 break; 1063 case 3: 1064 #if defined(PETSC_USE_COMPLEX) 1065 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1066 1067 CHKERRQ(ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1)); 1068 CHKERRQ(ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2)); 1069 CHKERRQ(VecScatterCreate(x,list1,y,list2,&vecscat)); 1070 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1071 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1072 CHKERRQ(VecScatterDestroy(&vecscat)); 1073 CHKERRQ(ISDestroy(&list1)); 1074 CHKERRQ(ISDestroy(&list2)); 1075 #else 1076 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1077 1078 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1)); 1079 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2)); 1080 1081 if (dim[2]%2==0) NM = dim[2]+2; 1082 else NM = dim[2]+1; 1083 1084 for (i=0; i<local_n0; i++) { 1085 for (j=0; j<dim[1]; j++) { 1086 for (k=0; k<dim[2]; k++) { 1087 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1088 tempindx1 = i*dim[1]*NM + j*NM + k; 1089 1090 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1091 indx2[tempindx]=low+tempindx1; 1092 } 1093 } 1094 } 1095 1096 CHKERRQ(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1)); 1097 CHKERRQ(ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2)); 1098 1099 CHKERRQ(VecScatterCreate(x,list2,y,list1,&vecscat)); 1100 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1101 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1102 CHKERRQ(VecScatterDestroy(&vecscat)); 1103 CHKERRQ(ISDestroy(&list1)); 1104 CHKERRQ(ISDestroy(&list2)); 1105 CHKERRQ(PetscFree(indx1)); 1106 CHKERRQ(PetscFree(indx2)); 1107 #endif 1108 break; 1109 default: 1110 #if defined(PETSC_USE_COMPLEX) 1111 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1112 1113 CHKERRQ(ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1)); 1114 CHKERRQ(ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2)); 1115 CHKERRQ(VecScatterCreate(x,list1,y,list2,&vecscat)); 1116 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1117 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1118 CHKERRQ(VecScatterDestroy(&vecscat)); 1119 CHKERRQ(ISDestroy(&list1)); 1120 CHKERRQ(ISDestroy(&list2)); 1121 #else 1122 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1123 1124 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1125 1126 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1127 1128 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1129 1130 partial_dim = fftw->partial_dim; 1131 1132 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1)); 1133 CHKERRQ(PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2)); 1134 1135 if (dim[ndim-1]%2==0) NM = 2; 1136 else NM = 1; 1137 1138 j = low; 1139 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1140 indx1[i] = local_0_start*partial_dim + i; 1141 indx2[i] = j; 1142 if (k%dim[ndim-1]==0) j+=NM; 1143 j++; 1144 } 1145 CHKERRQ(ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1)); 1146 CHKERRQ(ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2)); 1147 1148 CHKERRQ(VecScatterCreate(x,list2,y,list1,&vecscat)); 1149 CHKERRQ(VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1150 CHKERRQ(VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD)); 1151 CHKERRQ(VecScatterDestroy(&vecscat)); 1152 CHKERRQ(ISDestroy(&list1)); 1153 CHKERRQ(ISDestroy(&list2)); 1154 CHKERRQ(PetscFree(indx1)); 1155 CHKERRQ(PetscFree(indx2)); 1156 #endif 1157 break; 1158 } 1159 #endif 1160 } 1161 PetscFunctionReturn(0); 1162 } 1163 1164 /* 1165 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1166 1167 Options Database Keys: 1168 + -mat_fftw_plannerflags - set FFTW planner flags 1169 1170 Level: intermediate 1171 1172 */ 1173 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1174 { 1175 MPI_Comm comm; 1176 Mat_FFT *fft = (Mat_FFT*)A->data; 1177 Mat_FFTW *fftw; 1178 PetscInt ndim = fft->ndim,*dim = fft->dim; 1179 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1180 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1181 PetscBool flg; 1182 PetscInt p_flag,partial_dim=1,ctr; 1183 PetscMPIInt size,rank; 1184 ptrdiff_t *pdim; 1185 PetscErrorCode ierr; 1186 #if !defined(PETSC_USE_COMPLEX) 1187 PetscInt tot_dim; 1188 #endif 1189 1190 PetscFunctionBegin; 1191 CHKERRQ(PetscObjectGetComm((PetscObject)A,&comm)); 1192 CHKERRMPI(MPI_Comm_size(comm, &size)); 1193 CHKERRMPI(MPI_Comm_rank(comm, &rank)); 1194 1195 #if !PetscDefined(HAVE_MPIUNI) 1196 fftw_mpi_init(); 1197 #endif 1198 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1199 pdim[0] = dim[0]; 1200 #if !defined(PETSC_USE_COMPLEX) 1201 tot_dim = 2*dim[0]; 1202 #endif 1203 for (ctr=1; ctr<ndim; ctr++) { 1204 partial_dim *= dim[ctr]; 1205 pdim[ctr] = dim[ctr]; 1206 #if !defined(PETSC_USE_COMPLEX) 1207 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1208 else tot_dim *= dim[ctr]; 1209 #endif 1210 } 1211 1212 if (size == 1) { 1213 #if defined(PETSC_USE_COMPLEX) 1214 CHKERRQ(MatSetSizes(A,fft->N,fft->N,fft->N,fft->N)); 1215 fft->n = fft->N; 1216 #else 1217 CHKERRQ(MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim)); 1218 fft->n = tot_dim; 1219 #endif 1220 #if !PetscDefined(HAVE_MPIUNI) 1221 } else { 1222 ptrdiff_t local_n0,local_0_start,local_n1,local_1_start; 1223 #if !defined(PETSC_USE_COMPLEX) 1224 ptrdiff_t temp; 1225 PetscInt N1; 1226 #endif 1227 1228 switch (ndim) { 1229 case 1: 1230 #if !defined(PETSC_USE_COMPLEX) 1231 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1232 #else 1233 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1234 fft->n = (PetscInt)local_n0; 1235 CHKERRQ(MatSetSizes(A,local_n1,fft->n,fft->N,fft->N)); 1236 #endif 1237 break; 1238 case 2: 1239 #if defined(PETSC_USE_COMPLEX) 1240 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1241 fft->n = (PetscInt)local_n0*dim[1]; 1242 CHKERRQ(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 1243 #else 1244 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1245 1246 fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1247 CHKERRQ(MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1))); 1248 #endif 1249 break; 1250 case 3: 1251 #if defined(PETSC_USE_COMPLEX) 1252 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1253 1254 fft->n = (PetscInt)local_n0*dim[1]*dim[2]; 1255 CHKERRQ(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 1256 #else 1257 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1258 1259 fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1260 CHKERRQ(MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1))); 1261 #endif 1262 break; 1263 default: 1264 #if defined(PETSC_USE_COMPLEX) 1265 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1266 1267 fft->n = (PetscInt)local_n0*partial_dim; 1268 CHKERRQ(MatSetSizes(A,fft->n,fft->n,fft->N,fft->N)); 1269 #else 1270 temp = pdim[ndim-1]; 1271 1272 pdim[ndim-1] = temp/2 + 1; 1273 1274 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1275 1276 fft->n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1277 N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1278 1279 pdim[ndim-1] = temp; 1280 1281 CHKERRQ(MatSetSizes(A,fft->n,fft->n,N1,N1)); 1282 #endif 1283 break; 1284 } 1285 #endif 1286 } 1287 free(pdim); 1288 CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,MATFFTW)); 1289 CHKERRQ(PetscNewLog(A,&fftw)); 1290 fft->data = (void*)fftw; 1291 1292 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1293 fftw->partial_dim = partial_dim; 1294 1295 CHKERRQ(PetscMalloc1(ndim, &(fftw->dim_fftw))); 1296 if (size == 1) { 1297 #if defined(PETSC_USE_64BIT_INDICES) 1298 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1299 #else 1300 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1301 #endif 1302 } 1303 1304 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1305 1306 fftw->p_forward = NULL; 1307 fftw->p_backward = NULL; 1308 fftw->p_flag = FFTW_ESTIMATE; 1309 1310 if (size == 1) { 1311 A->ops->mult = MatMult_SeqFFTW; 1312 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1313 #if !PetscDefined(HAVE_MPIUNI) 1314 } else { 1315 A->ops->mult = MatMult_MPIFFTW; 1316 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1317 #endif 1318 } 1319 fft->matdestroy = MatDestroy_FFTW; 1320 A->assembled = PETSC_TRUE; 1321 A->preallocated = PETSC_TRUE; 1322 1323 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW)); 1324 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW)); 1325 CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW)); 1326 1327 /* get runtime options */ 1328 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1329 CHKERRQ(PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg)); 1330 if (flg) fftw->p_flag = iplans[p_flag]; 1331 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } 1334