1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #if !PetscDefined(HAVE_MPIUNI) 10 #include <fftw3-mpi.h> 11 #else 12 #include <fftw3.h> 13 #endif 14 EXTERN_C_END 15 16 typedef struct { 17 ptrdiff_t ndim_fftw,*dim_fftw; 18 #if defined(PETSC_USE_64BIT_INDICES) 19 fftw_iodim64 *iodims; 20 #else 21 fftw_iodim *iodims; 22 #endif 23 PetscInt partial_dim; 24 fftw_plan p_forward,p_backward; 25 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 26 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 27 executed for the arrays with which the plan was created */ 28 } Mat_FFTW; 29 30 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 31 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 32 extern PetscErrorCode MatDestroy_FFTW(Mat); 33 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*); 34 #if !PetscDefined(HAVE_MPIUNI) 35 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 36 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 37 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 38 #endif 39 40 /* 41 MatMult_SeqFFTW performs forward DFT 42 Input parameter: 43 A - the matrix 44 x - the vector on which FDFT will be performed 45 46 Output parameter: 47 y - vector that stores result of FDFT 48 */ 49 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 50 { 51 PetscErrorCode ierr; 52 Mat_FFT *fft = (Mat_FFT*)A->data; 53 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 54 const PetscScalar *x_array; 55 PetscScalar *y_array; 56 #if defined(PETSC_USE_COMPLEX) 57 #if defined(PETSC_USE_64BIT_INDICES) 58 fftw_iodim64 *iodims; 59 #else 60 fftw_iodim *iodims; 61 #endif 62 PetscInt i; 63 #endif 64 PetscInt ndim = fft->ndim,*dim = fft->dim; 65 66 PetscFunctionBegin; 67 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 68 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 69 if (!fftw->p_forward) { /* create a plan, then excute it */ 70 switch (ndim) { 71 case 1: 72 #if defined(PETSC_USE_COMPLEX) 73 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 74 #else 75 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 76 #endif 77 break; 78 case 2: 79 #if defined(PETSC_USE_COMPLEX) 80 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 81 #else 82 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 83 #endif 84 break; 85 case 3: 86 #if defined(PETSC_USE_COMPLEX) 87 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 88 #else 89 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 90 #endif 91 break; 92 default: 93 #if defined(PETSC_USE_COMPLEX) 94 iodims = fftw->iodims; 95 #if defined(PETSC_USE_64BIT_INDICES) 96 if (ndim) { 97 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 98 iodims[ndim-1].is = iodims[ndim-1].os = 1; 99 for (i=ndim-2; i>=0; --i) { 100 iodims[i].n = (ptrdiff_t)dim[i]; 101 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 102 } 103 } 104 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 105 #else 106 if (ndim) { 107 iodims[ndim-1].n = (int)dim[ndim-1]; 108 iodims[ndim-1].is = iodims[ndim-1].os = 1; 109 for (i=ndim-2; i>=0; --i) { 110 iodims[i].n = (int)dim[i]; 111 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 112 } 113 } 114 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 115 #endif 116 117 #else 118 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 119 #endif 120 break; 121 } 122 fftw->finarray = (PetscScalar *) x_array; 123 fftw->foutarray = y_array; 124 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 125 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 126 fftw_execute(fftw->p_forward); 127 } else { /* use existing plan */ 128 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 129 #if defined(PETSC_USE_COMPLEX) 130 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 131 #else 132 fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 133 #endif 134 } else { 135 fftw_execute(fftw->p_forward); 136 } 137 } 138 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 139 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 /* MatMultTranspose_SeqFFTW performs serial backward DFT 144 Input parameter: 145 A - the matrix 146 x - the vector on which BDFT will be performed 147 148 Output parameter: 149 y - vector that stores result of BDFT 150 */ 151 152 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 153 { 154 PetscErrorCode ierr; 155 Mat_FFT *fft = (Mat_FFT*)A->data; 156 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 157 const PetscScalar *x_array; 158 PetscScalar *y_array; 159 PetscInt ndim = fft->ndim,*dim = fft->dim; 160 #if defined(PETSC_USE_COMPLEX) 161 #if defined(PETSC_USE_64BIT_INDICES) 162 fftw_iodim64 *iodims = fftw->iodims; 163 #else 164 fftw_iodim *iodims = fftw->iodims; 165 #endif 166 #endif 167 168 PetscFunctionBegin; 169 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 170 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 171 if (!fftw->p_backward) { /* create a plan, then excute it */ 172 switch (ndim) { 173 case 1: 174 #if defined(PETSC_USE_COMPLEX) 175 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 176 #else 177 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 178 #endif 179 break; 180 case 2: 181 #if defined(PETSC_USE_COMPLEX) 182 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 183 #else 184 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 185 #endif 186 break; 187 case 3: 188 #if defined(PETSC_USE_COMPLEX) 189 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 190 #else 191 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 192 #endif 193 break; 194 default: 195 #if defined(PETSC_USE_COMPLEX) 196 #if defined(PETSC_USE_64BIT_INDICES) 197 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 198 #else 199 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 200 #endif 201 #else 202 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 203 #endif 204 break; 205 } 206 fftw->binarray = (PetscScalar *) x_array; 207 fftw->boutarray = y_array; 208 fftw_execute(fftw->p_backward); 209 } else { /* use existing plan */ 210 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 211 #if defined(PETSC_USE_COMPLEX) 212 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 213 #else 214 fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 215 #endif 216 } else { 217 fftw_execute(fftw->p_backward); 218 } 219 } 220 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 221 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #if !PetscDefined(HAVE_MPIUNI) 226 /* MatMult_MPIFFTW performs forward DFT in parallel 227 Input parameter: 228 A - the matrix 229 x - the vector on which FDFT will be performed 230 231 Output parameter: 232 y - vector that stores result of FDFT 233 */ 234 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 235 { 236 PetscErrorCode ierr; 237 Mat_FFT *fft = (Mat_FFT*)A->data; 238 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 239 const PetscScalar *x_array; 240 PetscScalar *y_array; 241 PetscInt ndim = fft->ndim,*dim = fft->dim; 242 MPI_Comm comm; 243 244 PetscFunctionBegin; 245 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 246 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 247 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 248 if (!fftw->p_forward) { /* create a plan, then excute it */ 249 switch (ndim) { 250 case 1: 251 #if defined(PETSC_USE_COMPLEX) 252 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 253 #else 254 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 255 #endif 256 break; 257 case 2: 258 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 259 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 260 #else 261 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 262 #endif 263 break; 264 case 3: 265 #if defined(PETSC_USE_COMPLEX) 266 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 267 #else 268 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 269 #endif 270 break; 271 default: 272 #if defined(PETSC_USE_COMPLEX) 273 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 274 #else 275 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 276 #endif 277 break; 278 } 279 fftw->finarray = (PetscScalar *) x_array; 280 fftw->foutarray = y_array; 281 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 282 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 283 fftw_execute(fftw->p_forward); 284 } else { /* use existing plan */ 285 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 286 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 287 } else { 288 fftw_execute(fftw->p_forward); 289 } 290 } 291 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 292 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 293 PetscFunctionReturn(0); 294 } 295 296 /* 297 MatMultTranspose_MPIFFTW performs parallel backward DFT 298 Input parameter: 299 A - the matrix 300 x - the vector on which BDFT will be performed 301 302 Output parameter: 303 y - vector that stores result of BDFT 304 */ 305 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 306 { 307 PetscErrorCode ierr; 308 Mat_FFT *fft = (Mat_FFT*)A->data; 309 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 310 const PetscScalar *x_array; 311 PetscScalar *y_array; 312 PetscInt ndim = fft->ndim,*dim = fft->dim; 313 MPI_Comm comm; 314 315 PetscFunctionBegin; 316 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 317 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 318 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 319 if (!fftw->p_backward) { /* create a plan, then excute it */ 320 switch (ndim) { 321 case 1: 322 #if defined(PETSC_USE_COMPLEX) 323 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 324 #else 325 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 326 #endif 327 break; 328 case 2: 329 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 330 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 331 #else 332 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 333 #endif 334 break; 335 case 3: 336 #if defined(PETSC_USE_COMPLEX) 337 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 338 #else 339 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 340 #endif 341 break; 342 default: 343 #if defined(PETSC_USE_COMPLEX) 344 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 345 #else 346 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 347 #endif 348 break; 349 } 350 fftw->binarray = (PetscScalar *) x_array; 351 fftw->boutarray = y_array; 352 fftw_execute(fftw->p_backward); 353 } else { /* use existing plan */ 354 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 355 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 356 } else { 357 fftw_execute(fftw->p_backward); 358 } 359 } 360 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 361 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 #endif 365 366 PetscErrorCode MatDestroy_FFTW(Mat A) 367 { 368 Mat_FFT *fft = (Mat_FFT*)A->data; 369 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 370 PetscErrorCode ierr; 371 372 PetscFunctionBegin; 373 fftw_destroy_plan(fftw->p_forward); 374 fftw_destroy_plan(fftw->p_backward); 375 if (fftw->iodims) { 376 free(fftw->iodims); 377 } 378 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 379 ierr = PetscFree(fft->data);CHKERRQ(ierr); 380 #if !PetscDefined(HAVE_MPIUNI) 381 fftw_mpi_cleanup(); 382 #endif 383 PetscFunctionReturn(0); 384 } 385 386 #if !PetscDefined(HAVE_MPIUNI) 387 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 388 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 389 { 390 PetscErrorCode ierr; 391 PetscScalar *array; 392 393 PetscFunctionBegin; 394 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 395 fftw_free((fftw_complex*)array); 396 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 397 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 398 PetscFunctionReturn(0); 399 } 400 #endif 401 402 #if !PetscDefined(HAVE_MPIUNI) 403 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new) 404 { 405 PetscErrorCode ierr; 406 Mat A; 407 408 PetscFunctionBegin; 409 ierr = PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 410 ierr = MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);CHKERRQ(ierr); 411 PetscFunctionReturn(0); 412 } 413 414 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new) 415 { 416 PetscErrorCode ierr; 417 Mat A; 418 419 PetscFunctionBegin; 420 ierr = PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 421 ierr = MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);CHKERRQ(ierr); 422 PetscFunctionReturn(0); 423 } 424 425 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 426 { 427 PetscErrorCode ierr; 428 Mat A; 429 430 PetscFunctionBegin; 431 ierr = PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 432 ierr = MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);CHKERRQ(ierr); 433 PetscFunctionReturn(0); 434 } 435 #endif 436 437 /*@ 438 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 439 parallel layout determined by FFTW 440 441 Collective on Mat 442 443 Input Parameter: 444 . A - the matrix 445 446 Output Parameters: 447 + x - (optional) input vector of forward FFTW 448 . y - (optional) output vector of forward FFTW 449 - z - (optional) output vector of backward FFTW 450 451 Level: advanced 452 453 Note: The parallel layout of output of forward FFTW is always same as the input 454 of the backward FFTW. But parallel layout of the input vector of forward 455 FFTW might not be same as the output of backward FFTW. 456 Also note that we need to provide enough space while doing parallel real transform. 457 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 458 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 459 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 460 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 461 zeros if it is odd only one zero is needed. 462 Lastly one needs some scratch space at the end of data set in each process. alloc_local 463 figures out how much space is needed, i.e. it figures out the data+scratch space for 464 each processor and returns that. 465 466 .seealso: MatCreateFFT() 467 @*/ 468 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 469 { 470 PetscErrorCode ierr; 471 472 PetscFunctionBegin; 473 ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 474 PetscFunctionReturn(0); 475 } 476 477 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 478 { 479 PetscErrorCode ierr; 480 PetscMPIInt size,rank; 481 MPI_Comm comm; 482 Mat_FFT *fft = (Mat_FFT*)A->data; 483 484 PetscFunctionBegin; 485 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 486 PetscValidType(A,1); 487 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 488 489 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 490 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 491 if (size == 1) { /* sequential case */ 492 #if defined(PETSC_USE_COMPLEX) 493 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->N,fin);CHKERRQ(ierr);} 494 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->N,fout);CHKERRQ(ierr);} 495 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->N,bout);CHKERRQ(ierr);} 496 #else 497 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->n,fin);CHKERRQ(ierr);} 498 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->n,fout);CHKERRQ(ierr);} 499 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,fft->n,bout);CHKERRQ(ierr);} 500 #endif 501 #if !PetscDefined(HAVE_MPIUNI) 502 } else { /* parallel cases */ 503 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 504 PetscInt ndim = fft->ndim,*dim = fft->dim; 505 ptrdiff_t alloc_local,local_n0,local_0_start; 506 ptrdiff_t local_n1; 507 fftw_complex *data_fout; 508 ptrdiff_t local_1_start; 509 #if defined(PETSC_USE_COMPLEX) 510 fftw_complex *data_fin,*data_bout; 511 #else 512 double *data_finr,*data_boutr; 513 PetscInt n1,N1; 514 ptrdiff_t temp; 515 #endif 516 517 switch (ndim) { 518 case 1: 519 #if !defined(PETSC_USE_COMPLEX) 520 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 521 #else 522 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 523 if (fin) { 524 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 525 ierr = VecCreateMPIWithArray(comm,1,local_n0,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 526 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 527 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 528 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 529 } 530 if (fout) { 531 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 532 ierr = VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 533 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 534 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 535 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 536 } 537 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 538 if (bout) { 539 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 540 ierr = VecCreateMPIWithArray(comm,1,local_n1,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 541 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 542 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 543 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 544 } 545 break; 546 #endif 547 case 2: 548 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 549 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 550 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 551 if (fin) { 552 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 553 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 554 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 555 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 556 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 557 } 558 if (fout) { 559 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 560 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 561 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 562 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 563 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 564 } 565 if (bout) { 566 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 567 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 568 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 569 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 570 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 571 } 572 #else 573 /* Get local size */ 574 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 575 if (fin) { 576 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 577 ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 578 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 579 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 580 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 581 } 582 if (fout) { 583 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 584 ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 585 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 586 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 587 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 588 } 589 if (bout) { 590 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 591 ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 592 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 593 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 594 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 595 } 596 #endif 597 break; 598 case 3: 599 #if !defined(PETSC_USE_COMPLEX) 600 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 601 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 602 if (fin) { 603 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 604 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 605 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 606 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 607 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 608 } 609 if (fout) { 610 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 611 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 612 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 613 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 614 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 615 } 616 if (bout) { 617 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 618 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 619 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 620 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 621 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 622 } 623 #else 624 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 625 if (fin) { 626 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 627 ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 628 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 629 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 630 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 631 } 632 if (fout) { 633 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 634 ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 635 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 636 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 637 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 638 } 639 if (bout) { 640 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 641 ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 642 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 643 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 644 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 645 } 646 #endif 647 break; 648 default: 649 #if !defined(PETSC_USE_COMPLEX) 650 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 651 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 652 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 653 N1 = 2*fft->N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 654 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 655 656 if (fin) { 657 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 658 ierr = VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 659 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 660 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 661 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 662 } 663 if (fout) { 664 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 665 ierr = VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 666 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 667 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 668 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 669 } 670 if (bout) { 671 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 672 ierr = VecCreateMPIWithArray(comm,1,fft->n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 673 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 674 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 675 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 676 } 677 #else 678 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 679 if (fin) { 680 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 681 ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 682 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 683 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 684 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 685 } 686 if (fout) { 687 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 688 ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 689 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 690 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 691 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 692 } 693 if (bout) { 694 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 695 ierr = VecCreateMPIWithArray(comm,1,fft->n,fft->N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 696 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 697 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 698 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 699 } 700 #endif 701 break; 702 } 703 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 704 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 705 memory leaks. We void these pointers here to avoid acident uses. 706 */ 707 if (fin) (*fin)->ops->replacearray = NULL; 708 if (fout) (*fout)->ops->replacearray = NULL; 709 if (bout) (*bout)->ops->replacearray = NULL; 710 #endif 711 } 712 PetscFunctionReturn(0); 713 } 714 715 /*@ 716 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 717 718 Collective on Mat 719 720 Input Parameters: 721 + A - FFTW matrix 722 - x - the PETSc vector 723 724 Output Parameters: 725 . y - the FFTW vector 726 727 Options Database Keys: 728 . -mat_fftw_plannerflags - set FFTW planner flags 729 730 Level: intermediate 731 732 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 733 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 734 zeros. This routine does that job by scattering operation. 735 736 .seealso: VecScatterFFTWToPetsc() 737 @*/ 738 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 739 { 740 PetscErrorCode ierr; 741 742 PetscFunctionBegin; 743 ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 747 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 748 { 749 PetscErrorCode ierr; 750 MPI_Comm comm; 751 Mat_FFT *fft = (Mat_FFT*)A->data; 752 PetscInt low; 753 PetscMPIInt rank,size; 754 PetscInt vsize,vsize1; 755 VecScatter vecscat; 756 IS list1; 757 758 PetscFunctionBegin; 759 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 760 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 761 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 762 ierr = VecGetOwnershipRange(y,&low,NULL);CHKERRQ(ierr); 763 764 if (size==1) { 765 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 766 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 767 ierr = ISCreateStride(PETSC_COMM_SELF,fft->N,0,1,&list1);CHKERRQ(ierr); 768 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 769 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 771 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 772 ierr = ISDestroy(&list1);CHKERRQ(ierr); 773 #if !PetscDefined(HAVE_MPIUNI) 774 } else { 775 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 776 PetscInt ndim = fft->ndim,*dim = fft->dim; 777 ptrdiff_t local_n0,local_0_start; 778 ptrdiff_t local_n1,local_1_start; 779 IS list2; 780 #if !defined(PETSC_USE_COMPLEX) 781 PetscInt i,j,k,partial_dim; 782 PetscInt *indx1, *indx2, tempindx, tempindx1; 783 PetscInt NM; 784 ptrdiff_t temp; 785 #endif 786 787 switch (ndim) { 788 case 1: 789 #if defined(PETSC_USE_COMPLEX) 790 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 791 792 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);CHKERRQ(ierr); 793 ierr = ISCreateStride(comm,local_n0,low,1,&list2);CHKERRQ(ierr); 794 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 795 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 798 ierr = ISDestroy(&list1);CHKERRQ(ierr); 799 ierr = ISDestroy(&list2);CHKERRQ(ierr); 800 #else 801 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 802 #endif 803 break; 804 case 2: 805 #if defined(PETSC_USE_COMPLEX) 806 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 807 808 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);CHKERRQ(ierr); 809 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);CHKERRQ(ierr); 810 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 811 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 812 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 813 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 814 ierr = ISDestroy(&list1);CHKERRQ(ierr); 815 ierr = ISDestroy(&list2);CHKERRQ(ierr); 816 #else 817 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 818 819 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 820 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 821 822 if (dim[1]%2==0) { 823 NM = dim[1]+2; 824 } else { 825 NM = dim[1]+1; 826 } 827 for (i=0; i<local_n0; i++) { 828 for (j=0; j<dim[1]; j++) { 829 tempindx = i*dim[1] + j; 830 tempindx1 = i*NM + j; 831 832 indx1[tempindx]=local_0_start*dim[1]+tempindx; 833 indx2[tempindx]=low+tempindx1; 834 } 835 } 836 837 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 838 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 839 840 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 841 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 844 ierr = ISDestroy(&list1);CHKERRQ(ierr); 845 ierr = ISDestroy(&list2);CHKERRQ(ierr); 846 ierr = PetscFree(indx1);CHKERRQ(ierr); 847 ierr = PetscFree(indx2);CHKERRQ(ierr); 848 #endif 849 break; 850 851 case 3: 852 #if defined(PETSC_USE_COMPLEX) 853 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 854 855 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);CHKERRQ(ierr); 856 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);CHKERRQ(ierr); 857 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 858 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 860 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 861 ierr = ISDestroy(&list1);CHKERRQ(ierr); 862 ierr = ISDestroy(&list2);CHKERRQ(ierr); 863 #else 864 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 865 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 866 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 867 868 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 869 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 870 871 if (dim[2]%2==0) NM = dim[2]+2; 872 else NM = dim[2]+1; 873 874 for (i=0; i<local_n0; i++) { 875 for (j=0; j<dim[1]; j++) { 876 for (k=0; k<dim[2]; k++) { 877 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 878 tempindx1 = i*dim[1]*NM + j*NM + k; 879 880 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 881 indx2[tempindx]=low+tempindx1; 882 } 883 } 884 } 885 886 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 887 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 888 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 889 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 892 ierr = ISDestroy(&list1);CHKERRQ(ierr); 893 ierr = ISDestroy(&list2);CHKERRQ(ierr); 894 ierr = PetscFree(indx1);CHKERRQ(ierr); 895 ierr = PetscFree(indx2);CHKERRQ(ierr); 896 #endif 897 break; 898 899 default: 900 #if defined(PETSC_USE_COMPLEX) 901 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 902 903 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);CHKERRQ(ierr); 904 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);CHKERRQ(ierr); 905 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 906 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 907 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 908 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 909 ierr = ISDestroy(&list1);CHKERRQ(ierr); 910 ierr = ISDestroy(&list2);CHKERRQ(ierr); 911 #else 912 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 913 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 914 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 915 916 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 917 918 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 919 920 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 921 922 partial_dim = fftw->partial_dim; 923 924 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 925 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 926 927 if (dim[ndim-1]%2==0) NM = 2; 928 else NM = 1; 929 930 j = low; 931 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 932 indx1[i] = local_0_start*partial_dim + i; 933 indx2[i] = j; 934 if (k%dim[ndim-1]==0) j+=NM; 935 j++; 936 } 937 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 938 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 939 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 940 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 943 ierr = ISDestroy(&list1);CHKERRQ(ierr); 944 ierr = ISDestroy(&list2);CHKERRQ(ierr); 945 ierr = PetscFree(indx1);CHKERRQ(ierr); 946 ierr = PetscFree(indx2);CHKERRQ(ierr); 947 #endif 948 break; 949 } 950 #endif 951 } 952 PetscFunctionReturn(0); 953 } 954 955 /*@ 956 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 957 958 Collective on Mat 959 960 Input Parameters: 961 + A - FFTW matrix 962 - x - FFTW vector 963 964 Output Parameters: 965 . y - PETSc vector 966 967 Level: intermediate 968 969 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 970 VecScatterFFTWToPetsc removes those extra zeros. 971 972 .seealso: VecScatterPetscToFFTW() 973 @*/ 974 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 975 { 976 PetscErrorCode ierr; 977 978 PetscFunctionBegin; 979 ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 983 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 984 { 985 PetscErrorCode ierr; 986 MPI_Comm comm; 987 Mat_FFT *fft = (Mat_FFT*)A->data; 988 PetscInt low; 989 PetscMPIInt rank,size; 990 VecScatter vecscat; 991 IS list1; 992 993 PetscFunctionBegin; 994 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 995 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 996 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 997 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 998 999 if (size==1) { 1000 ierr = ISCreateStride(comm,fft->N,0,1,&list1);CHKERRQ(ierr); 1001 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1002 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1005 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1006 1007 #if !PetscDefined(HAVE_MPIUNI) 1008 } else { 1009 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 1010 PetscInt ndim = fft->ndim,*dim = fft->dim; 1011 ptrdiff_t local_n0,local_0_start; 1012 ptrdiff_t local_n1,local_1_start; 1013 IS list2; 1014 #if !defined(PETSC_USE_COMPLEX) 1015 PetscInt i,j,k,partial_dim; 1016 PetscInt *indx1, *indx2, tempindx, tempindx1; 1017 PetscInt NM; 1018 ptrdiff_t temp; 1019 #endif 1020 switch (ndim) { 1021 case 1: 1022 #if defined(PETSC_USE_COMPLEX) 1023 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1024 1025 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);CHKERRQ(ierr); 1026 ierr = ISCreateStride(comm,local_n1,low,1,&list2);CHKERRQ(ierr); 1027 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1031 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1032 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1033 #else 1034 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1035 #endif 1036 break; 1037 case 2: 1038 #if defined(PETSC_USE_COMPLEX) 1039 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1040 1041 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);CHKERRQ(ierr); 1042 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);CHKERRQ(ierr); 1043 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1044 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1045 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1047 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1048 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1049 #else 1050 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1051 1052 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1053 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 1054 1055 if (dim[1]%2==0) NM = dim[1]+2; 1056 else NM = dim[1]+1; 1057 1058 for (i=0; i<local_n0; i++) { 1059 for (j=0; j<dim[1]; j++) { 1060 tempindx = i*dim[1] + j; 1061 tempindx1 = i*NM + j; 1062 1063 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1064 indx2[tempindx]=low+tempindx1; 1065 } 1066 } 1067 1068 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1069 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1070 1071 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1072 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1073 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1075 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1076 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1077 ierr = PetscFree(indx1);CHKERRQ(ierr); 1078 ierr = PetscFree(indx2);CHKERRQ(ierr); 1079 #endif 1080 break; 1081 case 3: 1082 #if defined(PETSC_USE_COMPLEX) 1083 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1084 1085 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);CHKERRQ(ierr); 1086 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);CHKERRQ(ierr); 1087 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1088 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1089 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1091 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1092 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1093 #else 1094 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1095 1096 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1097 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1098 1099 if (dim[2]%2==0) NM = dim[2]+2; 1100 else NM = dim[2]+1; 1101 1102 for (i=0; i<local_n0; i++) { 1103 for (j=0; j<dim[1]; j++) { 1104 for (k=0; k<dim[2]; k++) { 1105 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1106 tempindx1 = i*dim[1]*NM + j*NM + k; 1107 1108 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1109 indx2[tempindx]=low+tempindx1; 1110 } 1111 } 1112 } 1113 1114 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1115 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1116 1117 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1118 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1121 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1122 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1123 ierr = PetscFree(indx1);CHKERRQ(ierr); 1124 ierr = PetscFree(indx2);CHKERRQ(ierr); 1125 #endif 1126 break; 1127 default: 1128 #if defined(PETSC_USE_COMPLEX) 1129 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1130 1131 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);CHKERRQ(ierr); 1132 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);CHKERRQ(ierr); 1133 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1134 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1135 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1136 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1137 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1138 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1139 #else 1140 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1141 1142 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1143 1144 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1145 1146 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1147 1148 partial_dim = fftw->partial_dim; 1149 1150 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1151 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1152 1153 if (dim[ndim-1]%2==0) NM = 2; 1154 else NM = 1; 1155 1156 j = low; 1157 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1158 indx1[i] = local_0_start*partial_dim + i; 1159 indx2[i] = j; 1160 if (k%dim[ndim-1]==0) j+=NM; 1161 j++; 1162 } 1163 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1164 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1165 1166 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1167 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1168 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1169 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1170 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1171 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1172 ierr = PetscFree(indx1);CHKERRQ(ierr); 1173 ierr = PetscFree(indx2);CHKERRQ(ierr); 1174 #endif 1175 break; 1176 } 1177 #endif 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 /* 1183 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1184 1185 Options Database Keys: 1186 + -mat_fftw_plannerflags - set FFTW planner flags 1187 1188 Level: intermediate 1189 1190 */ 1191 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1192 { 1193 PetscErrorCode ierr; 1194 MPI_Comm comm; 1195 Mat_FFT *fft = (Mat_FFT*)A->data; 1196 Mat_FFTW *fftw; 1197 PetscInt ndim = fft->ndim,*dim = fft->dim; 1198 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1199 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1200 PetscBool flg; 1201 PetscInt p_flag,partial_dim=1,ctr; 1202 PetscMPIInt size,rank; 1203 ptrdiff_t *pdim; 1204 #if !defined(PETSC_USE_COMPLEX) 1205 PetscInt tot_dim; 1206 #endif 1207 1208 PetscFunctionBegin; 1209 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1210 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 1211 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 1212 1213 #if !PetscDefined(HAVE_MPIUNI) 1214 fftw_mpi_init(); 1215 #endif 1216 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1217 pdim[0] = dim[0]; 1218 #if !defined(PETSC_USE_COMPLEX) 1219 tot_dim = 2*dim[0]; 1220 #endif 1221 for (ctr=1; ctr<ndim; ctr++) { 1222 partial_dim *= dim[ctr]; 1223 pdim[ctr] = dim[ctr]; 1224 #if !defined(PETSC_USE_COMPLEX) 1225 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1226 else tot_dim *= dim[ctr]; 1227 #endif 1228 } 1229 1230 if (size == 1) { 1231 #if defined(PETSC_USE_COMPLEX) 1232 ierr = MatSetSizes(A,fft->N,fft->N,fft->N,fft->N);CHKERRQ(ierr); 1233 fft->n = fft->N; 1234 #else 1235 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1236 fft->n = tot_dim; 1237 #endif 1238 #if !PetscDefined(HAVE_MPIUNI) 1239 } else { 1240 ptrdiff_t local_n0,local_0_start,local_n1,local_1_start; 1241 #if !defined(PETSC_USE_COMPLEX) 1242 ptrdiff_t temp; 1243 PetscInt N1; 1244 #endif 1245 1246 switch (ndim) { 1247 case 1: 1248 #if !defined(PETSC_USE_COMPLEX) 1249 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1250 #else 1251 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1252 fft->n = (PetscInt)local_n0; 1253 ierr = MatSetSizes(A,local_n1,fft->n,fft->N,fft->N);CHKERRQ(ierr); 1254 #endif 1255 break; 1256 case 2: 1257 #if defined(PETSC_USE_COMPLEX) 1258 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1259 fft->n = (PetscInt)local_n0*dim[1]; 1260 ierr = MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);CHKERRQ(ierr); 1261 #else 1262 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1263 1264 fft->n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1265 ierr = MatSetSizes(A,fft->n,fft->n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));CHKERRQ(ierr); 1266 #endif 1267 break; 1268 case 3: 1269 #if defined(PETSC_USE_COMPLEX) 1270 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1271 1272 fft->n = (PetscInt)local_n0*dim[1]*dim[2]; 1273 ierr = MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);CHKERRQ(ierr); 1274 #else 1275 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1276 1277 fft->n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1278 ierr = MatSetSizes(A,fft->n,fft->n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));CHKERRQ(ierr); 1279 #endif 1280 break; 1281 default: 1282 #if defined(PETSC_USE_COMPLEX) 1283 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1284 1285 fft->n = (PetscInt)local_n0*partial_dim; 1286 ierr = MatSetSizes(A,fft->n,fft->n,fft->N,fft->N);CHKERRQ(ierr); 1287 #else 1288 temp = pdim[ndim-1]; 1289 1290 pdim[ndim-1] = temp/2 + 1; 1291 1292 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1293 1294 fft->n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1295 N1 = 2*fft->N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1296 1297 pdim[ndim-1] = temp; 1298 1299 ierr = MatSetSizes(A,fft->n,fft->n,N1,N1);CHKERRQ(ierr); 1300 #endif 1301 break; 1302 } 1303 #endif 1304 } 1305 free(pdim); 1306 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1307 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1308 fft->data = (void*)fftw; 1309 1310 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1311 fftw->partial_dim = partial_dim; 1312 1313 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1314 if (size == 1) { 1315 #if defined(PETSC_USE_64BIT_INDICES) 1316 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1317 #else 1318 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1319 #endif 1320 } 1321 1322 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1323 1324 fftw->p_forward = NULL; 1325 fftw->p_backward = NULL; 1326 fftw->p_flag = FFTW_ESTIMATE; 1327 1328 if (size == 1) { 1329 A->ops->mult = MatMult_SeqFFTW; 1330 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1331 #if !PetscDefined(HAVE_MPIUNI) 1332 } else { 1333 A->ops->mult = MatMult_MPIFFTW; 1334 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1335 #endif 1336 } 1337 fft->matdestroy = MatDestroy_FFTW; 1338 A->assembled = PETSC_TRUE; 1339 A->preallocated = PETSC_TRUE; 1340 1341 ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1342 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1343 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1344 1345 /* get runtime options */ 1346 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1347 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1348 if (flg) { 1349 fftw->p_flag = iplans[p_flag]; 1350 } 1351 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } 1354 1355