1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 #if defined(PETSC_USE_64BIT_INDICES) 15 fftw_iodim64 *iodims; 16 #else 17 fftw_iodim *iodims; 18 #endif 19 PetscInt partial_dim; 20 fftw_plan p_forward,p_backward; 21 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23 executed for the arrays with which the plan was created */ 24 } Mat_FFTW; 25 26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30 extern PetscErrorCode MatDestroy_FFTW(Mat); 31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32 33 /* MatMult_SeqFFTW performs forward DFT in parallel 34 Input parameter: 35 A - the matrix 36 x - the vector on which FDFT will be performed 37 38 Output parameter: 39 y - vector that stores result of FDFT 40 */ 41 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 42 { 43 PetscErrorCode ierr; 44 Mat_FFT *fft = (Mat_FFT*)A->data; 45 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 46 const PetscScalar *x_array; 47 PetscScalar *y_array; 48 #if defined(PETSC_USE_COMPLEX) 49 #if defined(PETSC_USE_64BIT_INDICES) 50 fftw_iodim64 *iodims; 51 #else 52 fftw_iodim *iodims; 53 #endif 54 PetscInt i; 55 #endif 56 PetscInt ndim = fft->ndim,*dim = fft->dim; 57 58 PetscFunctionBegin; 59 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 60 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 61 if (!fftw->p_forward) { /* create a plan, then excute it */ 62 switch (ndim) { 63 case 1: 64 #if defined(PETSC_USE_COMPLEX) 65 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 66 #else 67 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 68 #endif 69 break; 70 case 2: 71 #if defined(PETSC_USE_COMPLEX) 72 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 73 #else 74 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 75 #endif 76 break; 77 case 3: 78 #if defined(PETSC_USE_COMPLEX) 79 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 80 #else 81 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 82 #endif 83 break; 84 default: 85 #if defined(PETSC_USE_COMPLEX) 86 iodims = fftw->iodims; 87 #if defined(PETSC_USE_64BIT_INDICES) 88 if (ndim) { 89 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 90 iodims[ndim-1].is = iodims[ndim-1].os = 1; 91 for (i=ndim-2; i>=0; --i) { 92 iodims[i].n = (ptrdiff_t)dim[i]; 93 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 94 } 95 } 96 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 97 #else 98 if (ndim) { 99 iodims[ndim-1].n = (int)dim[ndim-1]; 100 iodims[ndim-1].is = iodims[ndim-1].os = 1; 101 for (i=ndim-2; i>=0; --i) { 102 iodims[i].n = (int)dim[i]; 103 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 104 } 105 } 106 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 107 #endif 108 109 #else 110 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 111 #endif 112 break; 113 } 114 fftw->finarray = (PetscScalar *) x_array; 115 fftw->foutarray = y_array; 116 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 117 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 118 fftw_execute(fftw->p_forward); 119 } else { /* use existing plan */ 120 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 121 #if defined(PETSC_USE_COMPLEX) 122 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 123 #else 124 fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 125 #endif 126 } else { 127 fftw_execute(fftw->p_forward); 128 } 129 } 130 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 131 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 /* MatMultTranspose_SeqFFTW performs serial backward DFT 136 Input parameter: 137 A - the matrix 138 x - the vector on which BDFT will be performed 139 140 Output parameter: 141 y - vector that stores result of BDFT 142 */ 143 144 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 145 { 146 PetscErrorCode ierr; 147 Mat_FFT *fft = (Mat_FFT*)A->data; 148 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 149 const PetscScalar *x_array; 150 PetscScalar *y_array; 151 PetscInt ndim=fft->ndim,*dim=fft->dim; 152 #if defined(PETSC_USE_COMPLEX) 153 #if defined(PETSC_USE_64BIT_INDICES) 154 fftw_iodim64 *iodims=fftw->iodims; 155 #else 156 fftw_iodim *iodims=fftw->iodims; 157 #endif 158 #endif 159 160 PetscFunctionBegin; 161 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 162 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163 if (!fftw->p_backward) { /* create a plan, then excute it */ 164 switch (ndim) { 165 case 1: 166 #if defined(PETSC_USE_COMPLEX) 167 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 168 #else 169 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 170 #endif 171 break; 172 case 2: 173 #if defined(PETSC_USE_COMPLEX) 174 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 175 #else 176 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 177 #endif 178 break; 179 case 3: 180 #if defined(PETSC_USE_COMPLEX) 181 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 182 #else 183 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 184 #endif 185 break; 186 default: 187 #if defined(PETSC_USE_COMPLEX) 188 #if defined(PETSC_USE_64BIT_INDICES) 189 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 190 #else 191 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 192 #endif 193 #else 194 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 195 #endif 196 break; 197 } 198 fftw->binarray = (PetscScalar *) x_array; 199 fftw->boutarray = y_array; 200 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 201 } else { /* use existing plan */ 202 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 203 #if defined(PETSC_USE_COMPLEX) 204 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 205 #else 206 fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 207 #endif 208 } else { 209 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 210 } 211 } 212 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 213 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 214 PetscFunctionReturn(0); 215 } 216 217 /* MatMult_MPIFFTW performs forward DFT in parallel 218 Input parameter: 219 A - the matrix 220 x - the vector on which FDFT will be performed 221 222 Output parameter: 223 y - vector that stores result of FDFT 224 */ 225 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 226 { 227 PetscErrorCode ierr; 228 Mat_FFT *fft = (Mat_FFT*)A->data; 229 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 230 const PetscScalar *x_array; 231 PetscScalar *y_array; 232 PetscInt ndim=fft->ndim,*dim=fft->dim; 233 MPI_Comm comm; 234 235 PetscFunctionBegin; 236 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 237 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 238 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 239 if (!fftw->p_forward) { /* create a plan, then excute it */ 240 switch (ndim) { 241 case 1: 242 #if defined(PETSC_USE_COMPLEX) 243 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 244 #else 245 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 246 #endif 247 break; 248 case 2: 249 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 250 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 251 #else 252 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 253 #endif 254 break; 255 case 3: 256 #if defined(PETSC_USE_COMPLEX) 257 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 258 #else 259 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 260 #endif 261 break; 262 default: 263 #if defined(PETSC_USE_COMPLEX) 264 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 265 #else 266 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 267 #endif 268 break; 269 } 270 fftw->finarray = (PetscScalar *) x_array; 271 fftw->foutarray = y_array; 272 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 273 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 274 fftw_execute(fftw->p_forward); 275 } else { /* use existing plan */ 276 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 277 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 278 } else { 279 fftw_execute(fftw->p_forward); 280 } 281 } 282 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 283 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 288 Input parameter: 289 A - the matrix 290 x - the vector on which BDFT will be performed 291 292 Output parameter: 293 y - vector that stores result of BDFT 294 */ 295 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 296 { 297 PetscErrorCode ierr; 298 Mat_FFT *fft = (Mat_FFT*)A->data; 299 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 300 const PetscScalar *x_array; 301 PetscScalar *y_array; 302 PetscInt ndim=fft->ndim,*dim=fft->dim; 303 MPI_Comm comm; 304 305 PetscFunctionBegin; 306 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 307 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 308 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 309 if (!fftw->p_backward) { /* create a plan, then excute it */ 310 switch (ndim) { 311 case 1: 312 #if defined(PETSC_USE_COMPLEX) 313 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 314 #else 315 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 316 #endif 317 break; 318 case 2: 319 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 320 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 321 #else 322 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 323 #endif 324 break; 325 case 3: 326 #if defined(PETSC_USE_COMPLEX) 327 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 328 #else 329 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 330 #endif 331 break; 332 default: 333 #if defined(PETSC_USE_COMPLEX) 334 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 335 #else 336 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 337 #endif 338 break; 339 } 340 fftw->binarray = (PetscScalar *) x_array; 341 fftw->boutarray = y_array; 342 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 343 } else { /* use existing plan */ 344 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 345 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 346 } else { 347 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 348 } 349 } 350 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 351 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 352 PetscFunctionReturn(0); 353 } 354 355 PetscErrorCode MatDestroy_FFTW(Mat A) 356 { 357 Mat_FFT *fft = (Mat_FFT*)A->data; 358 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 359 PetscErrorCode ierr; 360 361 PetscFunctionBegin; 362 fftw_destroy_plan(fftw->p_forward); 363 fftw_destroy_plan(fftw->p_backward); 364 if (fftw->iodims) { 365 free(fftw->iodims); 366 } 367 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 368 ierr = PetscFree(fft->data);CHKERRQ(ierr); 369 fftw_mpi_cleanup(); 370 PetscFunctionReturn(0); 371 } 372 373 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 374 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 375 { 376 PetscErrorCode ierr; 377 PetscScalar *array; 378 379 PetscFunctionBegin; 380 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 381 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 382 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 383 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 384 PetscFunctionReturn(0); 385 } 386 387 /*@ 388 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 389 parallel layout determined by FFTW 390 391 Collective on Mat 392 393 Input Parameter: 394 . A - the matrix 395 396 Output Parameter: 397 + x - (optional) input vector of forward FFTW 398 - y - (optional) output vector of forward FFTW 399 - z - (optional) output vector of backward FFTW 400 401 Level: advanced 402 403 Note: The parallel layout of output of forward FFTW is always same as the input 404 of the backward FFTW. But parallel layout of the input vector of forward 405 FFTW might not be same as the output of backward FFTW. 406 Also note that we need to provide enough space while doing parallel real transform. 407 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 408 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 409 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 410 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 411 zeros if it is odd only one zero is needed. 412 Lastly one needs some scratch space at the end of data set in each process. alloc_local 413 figures out how much space is needed, i.e. it figures out the data+scratch space for 414 each processor and returns that. 415 416 .seealso: MatCreateFFTW() 417 @*/ 418 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 419 { 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 428 { 429 PetscErrorCode ierr; 430 PetscMPIInt size,rank; 431 MPI_Comm comm; 432 Mat_FFT *fft = (Mat_FFT*)A->data; 433 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 434 PetscInt N = fft->N; 435 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 436 437 PetscFunctionBegin; 438 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 439 PetscValidType(A,1); 440 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 441 442 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 443 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 444 if (size == 1) { /* sequential case */ 445 #if defined(PETSC_USE_COMPLEX) 446 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 447 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 448 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 449 #else 450 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 451 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 452 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 453 #endif 454 } else { /* parallel cases */ 455 ptrdiff_t alloc_local,local_n0,local_0_start; 456 ptrdiff_t local_n1; 457 fftw_complex *data_fout; 458 ptrdiff_t local_1_start; 459 #if defined(PETSC_USE_COMPLEX) 460 fftw_complex *data_fin,*data_bout; 461 #else 462 double *data_finr,*data_boutr; 463 PetscInt n1,N1; 464 ptrdiff_t temp; 465 #endif 466 467 switch (ndim) { 468 case 1: 469 #if !defined(PETSC_USE_COMPLEX) 470 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 471 #else 472 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 473 if (fin) { 474 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 475 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 476 477 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 478 } 479 if (fout) { 480 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 481 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 482 483 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 484 } 485 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 486 if (bout) { 487 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 488 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 489 490 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 491 } 492 break; 493 #endif 494 case 2: 495 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 496 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 497 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 498 if (fin) { 499 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 500 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 501 502 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 503 } 504 if (bout) { 505 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 506 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 507 508 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 509 } 510 if (fout) { 511 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 512 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 513 514 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 515 } 516 #else 517 /* Get local size */ 518 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 519 if (fin) { 520 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 521 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 522 523 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 524 } 525 if (fout) { 526 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 527 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 528 529 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 530 } 531 if (bout) { 532 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 533 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 534 535 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 536 } 537 #endif 538 break; 539 case 3: 540 #if !defined(PETSC_USE_COMPLEX) 541 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 542 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 543 if (fin) { 544 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 545 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 546 547 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 548 } 549 if (bout) { 550 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 551 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 552 553 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 554 } 555 556 if (fout) { 557 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 558 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 559 560 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 561 } 562 #else 563 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 564 if (fin) { 565 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 566 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 567 568 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 569 } 570 if (fout) { 571 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 572 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 573 574 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 575 } 576 if (bout) { 577 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 578 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 579 580 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 581 } 582 #endif 583 break; 584 default: 585 #if !defined(PETSC_USE_COMPLEX) 586 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 587 588 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 589 590 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 591 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 592 593 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 594 595 if (fin) { 596 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 597 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 598 599 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 600 } 601 if (bout) { 602 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 603 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 604 605 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 606 } 607 608 if (fout) { 609 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 610 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 611 612 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 613 } 614 #else 615 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 616 if (fin) { 617 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 618 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 619 620 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 621 } 622 if (fout) { 623 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 624 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 625 626 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 627 } 628 if (bout) { 629 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 630 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 631 632 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 633 } 634 #endif 635 break; 636 } 637 } 638 PetscFunctionReturn(0); 639 } 640 641 /*@ 642 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 643 644 Collective on Mat 645 646 Input Parameters: 647 + A - FFTW matrix 648 - x - the PETSc vector 649 650 Output Parameters: 651 . y - the FFTW vector 652 653 Options Database Keys: 654 . -mat_fftw_plannerflags - set FFTW planner flags 655 656 Level: intermediate 657 658 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 659 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 660 zeros. This routine does that job by scattering operation. 661 662 .seealso: VecScatterFFTWToPetsc() 663 @*/ 664 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 665 { 666 PetscErrorCode ierr; 667 668 PetscFunctionBegin; 669 ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 670 PetscFunctionReturn(0); 671 } 672 673 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 674 { 675 PetscErrorCode ierr; 676 MPI_Comm comm; 677 Mat_FFT *fft = (Mat_FFT*)A->data; 678 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 679 PetscInt N =fft->N; 680 PetscInt ndim =fft->ndim,*dim=fft->dim; 681 PetscInt low; 682 PetscMPIInt rank,size; 683 PetscInt vsize,vsize1; 684 ptrdiff_t local_n0,local_0_start; 685 ptrdiff_t local_n1,local_1_start; 686 VecScatter vecscat; 687 IS list1,list2; 688 #if !defined(PETSC_USE_COMPLEX) 689 PetscInt i,j,k,partial_dim; 690 PetscInt *indx1, *indx2, tempindx, tempindx1; 691 PetscInt NM; 692 ptrdiff_t temp; 693 #endif 694 695 PetscFunctionBegin; 696 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 697 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 698 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 699 ierr = VecGetOwnershipRange(y,&low,NULL); 700 701 if (size==1) { 702 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 703 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 704 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 705 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 706 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 707 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 708 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 709 ierr = ISDestroy(&list1);CHKERRQ(ierr); 710 } else { 711 switch (ndim) { 712 case 1: 713 #if defined(PETSC_USE_COMPLEX) 714 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 715 716 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 717 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 718 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 719 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 720 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 721 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 722 ierr = ISDestroy(&list1);CHKERRQ(ierr); 723 ierr = ISDestroy(&list2);CHKERRQ(ierr); 724 #else 725 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 726 #endif 727 break; 728 case 2: 729 #if defined(PETSC_USE_COMPLEX) 730 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 731 732 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 733 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 734 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 735 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 736 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 737 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 738 ierr = ISDestroy(&list1);CHKERRQ(ierr); 739 ierr = ISDestroy(&list2);CHKERRQ(ierr); 740 #else 741 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 742 743 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 744 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 745 746 if (dim[1]%2==0) { 747 NM = dim[1]+2; 748 } else { 749 NM = dim[1]+1; 750 } 751 for (i=0; i<local_n0; i++) { 752 for (j=0; j<dim[1]; j++) { 753 tempindx = i*dim[1] + j; 754 tempindx1 = i*NM + j; 755 756 indx1[tempindx]=local_0_start*dim[1]+tempindx; 757 indx2[tempindx]=low+tempindx1; 758 } 759 } 760 761 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 762 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 763 764 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 765 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 766 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 767 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 768 ierr = ISDestroy(&list1);CHKERRQ(ierr); 769 ierr = ISDestroy(&list2);CHKERRQ(ierr); 770 ierr = PetscFree(indx1);CHKERRQ(ierr); 771 ierr = PetscFree(indx2);CHKERRQ(ierr); 772 #endif 773 break; 774 775 case 3: 776 #if defined(PETSC_USE_COMPLEX) 777 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 778 779 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 780 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 781 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 782 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 783 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 784 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 785 ierr = ISDestroy(&list1);CHKERRQ(ierr); 786 ierr = ISDestroy(&list2);CHKERRQ(ierr); 787 #else 788 /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 789 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 790 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 791 792 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 793 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 794 795 if (dim[2]%2==0) NM = dim[2]+2; 796 else NM = dim[2]+1; 797 798 for (i=0; i<local_n0; i++) { 799 for (j=0; j<dim[1]; j++) { 800 for (k=0; k<dim[2]; k++) { 801 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 802 tempindx1 = i*dim[1]*NM + j*NM + k; 803 804 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 805 indx2[tempindx]=low+tempindx1; 806 } 807 } 808 } 809 810 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 811 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 812 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 813 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 814 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 815 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 816 ierr = ISDestroy(&list1);CHKERRQ(ierr); 817 ierr = ISDestroy(&list2);CHKERRQ(ierr); 818 ierr = PetscFree(indx1);CHKERRQ(ierr); 819 ierr = PetscFree(indx2);CHKERRQ(ierr); 820 #endif 821 break; 822 823 default: 824 #if defined(PETSC_USE_COMPLEX) 825 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 826 827 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 828 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 829 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 830 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 831 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 832 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 833 ierr = ISDestroy(&list1);CHKERRQ(ierr); 834 ierr = ISDestroy(&list2);CHKERRQ(ierr); 835 #else 836 /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 837 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 838 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 839 840 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 841 842 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 843 844 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 845 846 partial_dim = fftw->partial_dim; 847 848 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 849 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 850 851 if (dim[ndim-1]%2==0) NM = 2; 852 else NM = 1; 853 854 j = low; 855 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 856 indx1[i] = local_0_start*partial_dim + i; 857 indx2[i] = j; 858 if (k%dim[ndim-1]==0) j+=NM; 859 j++; 860 } 861 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 862 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 863 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 864 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 866 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 867 ierr = ISDestroy(&list1);CHKERRQ(ierr); 868 ierr = ISDestroy(&list2);CHKERRQ(ierr); 869 ierr = PetscFree(indx1);CHKERRQ(ierr); 870 ierr = PetscFree(indx2);CHKERRQ(ierr); 871 #endif 872 break; 873 } 874 } 875 PetscFunctionReturn(0); 876 } 877 878 /*@ 879 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 880 881 Collective on Mat 882 883 Input Parameters: 884 + A - FFTW matrix 885 - x - FFTW vector 886 887 Output Parameters: 888 . y - PETSc vector 889 890 Level: intermediate 891 892 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 893 VecScatterFFTWToPetsc removes those extra zeros. 894 895 .seealso: VecScatterPetscToFFTW() 896 @*/ 897 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 898 { 899 PetscErrorCode ierr; 900 901 PetscFunctionBegin; 902 ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 907 { 908 PetscErrorCode ierr; 909 MPI_Comm comm; 910 Mat_FFT *fft = (Mat_FFT*)A->data; 911 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 912 PetscInt N = fft->N; 913 PetscInt ndim = fft->ndim,*dim=fft->dim; 914 PetscInt low; 915 PetscMPIInt rank,size; 916 ptrdiff_t local_n0,local_0_start; 917 ptrdiff_t local_n1,local_1_start; 918 VecScatter vecscat; 919 IS list1,list2; 920 #if !defined(PETSC_USE_COMPLEX) 921 PetscInt i,j,k,partial_dim; 922 PetscInt *indx1, *indx2, tempindx, tempindx1; 923 PetscInt NM; 924 ptrdiff_t temp; 925 #endif 926 927 PetscFunctionBegin; 928 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 929 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 930 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 931 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 932 933 if (size==1) { 934 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 935 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 936 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 937 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 938 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 939 ierr = ISDestroy(&list1);CHKERRQ(ierr); 940 941 } else { 942 switch (ndim) { 943 case 1: 944 #if defined(PETSC_USE_COMPLEX) 945 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 946 947 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 948 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 949 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 950 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 952 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 953 ierr = ISDestroy(&list1);CHKERRQ(ierr); 954 ierr = ISDestroy(&list2);CHKERRQ(ierr); 955 #else 956 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 957 #endif 958 break; 959 case 2: 960 #if defined(PETSC_USE_COMPLEX) 961 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 962 963 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 964 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 965 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 966 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 969 ierr = ISDestroy(&list1);CHKERRQ(ierr); 970 ierr = ISDestroy(&list2);CHKERRQ(ierr); 971 #else 972 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 973 974 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 975 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 976 977 if (dim[1]%2==0) NM = dim[1]+2; 978 else NM = dim[1]+1; 979 980 for (i=0; i<local_n0; i++) { 981 for (j=0; j<dim[1]; j++) { 982 tempindx = i*dim[1] + j; 983 tempindx1 = i*NM + j; 984 985 indx1[tempindx]=local_0_start*dim[1]+tempindx; 986 indx2[tempindx]=low+tempindx1; 987 } 988 } 989 990 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 991 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 992 993 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 994 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 995 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 996 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 997 ierr = ISDestroy(&list1);CHKERRQ(ierr); 998 ierr = ISDestroy(&list2);CHKERRQ(ierr); 999 ierr = PetscFree(indx1);CHKERRQ(ierr); 1000 ierr = PetscFree(indx2);CHKERRQ(ierr); 1001 #endif 1002 break; 1003 case 3: 1004 #if defined(PETSC_USE_COMPLEX) 1005 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1006 1007 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1008 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1009 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1010 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1012 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1013 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1014 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1015 #else 1016 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1017 1018 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1019 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1020 1021 if (dim[2]%2==0) NM = dim[2]+2; 1022 else NM = dim[2]+1; 1023 1024 for (i=0; i<local_n0; i++) { 1025 for (j=0; j<dim[1]; j++) { 1026 for (k=0; k<dim[2]; k++) { 1027 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1028 tempindx1 = i*dim[1]*NM + j*NM + k; 1029 1030 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1031 indx2[tempindx]=low+tempindx1; 1032 } 1033 } 1034 } 1035 1036 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1037 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1038 1039 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1040 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1043 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1044 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1045 ierr = PetscFree(indx1);CHKERRQ(ierr); 1046 ierr = PetscFree(indx2);CHKERRQ(ierr); 1047 #endif 1048 break; 1049 default: 1050 #if defined(PETSC_USE_COMPLEX) 1051 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1052 1053 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1054 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1055 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1056 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1057 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1059 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1060 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1061 #else 1062 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1063 1064 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1065 1066 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1067 1068 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1069 1070 partial_dim = fftw->partial_dim; 1071 1072 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1073 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1074 1075 if (dim[ndim-1]%2==0) NM = 2; 1076 else NM = 1; 1077 1078 j = low; 1079 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1080 indx1[i] = local_0_start*partial_dim + i; 1081 indx2[i] = j; 1082 if (k%dim[ndim-1]==0) j+=NM; 1083 j++; 1084 } 1085 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1086 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1087 1088 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1089 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1091 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1092 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1093 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1094 ierr = PetscFree(indx1);CHKERRQ(ierr); 1095 ierr = PetscFree(indx2);CHKERRQ(ierr); 1096 #endif 1097 break; 1098 } 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 /* 1104 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1105 1106 Options Database Keys: 1107 + -mat_fftw_plannerflags - set FFTW planner flags 1108 1109 Level: intermediate 1110 1111 */ 1112 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1113 { 1114 PetscErrorCode ierr; 1115 MPI_Comm comm; 1116 Mat_FFT *fft=(Mat_FFT*)A->data; 1117 Mat_FFTW *fftw; 1118 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1119 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1120 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1121 PetscBool flg; 1122 PetscInt p_flag,partial_dim=1,ctr; 1123 PetscMPIInt size,rank; 1124 ptrdiff_t *pdim; 1125 ptrdiff_t local_n1,local_1_start; 1126 #if !defined(PETSC_USE_COMPLEX) 1127 ptrdiff_t temp; 1128 PetscInt N1,tot_dim; 1129 #else 1130 PetscInt n1; 1131 #endif 1132 1133 PetscFunctionBegin; 1134 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1135 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1136 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1137 1138 fftw_mpi_init(); 1139 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1140 pdim[0] = dim[0]; 1141 #if !defined(PETSC_USE_COMPLEX) 1142 tot_dim = 2*dim[0]; 1143 #endif 1144 for (ctr=1; ctr<ndim; ctr++) { 1145 partial_dim *= dim[ctr]; 1146 pdim[ctr] = dim[ctr]; 1147 #if !defined(PETSC_USE_COMPLEX) 1148 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1149 else tot_dim *= dim[ctr]; 1150 #endif 1151 } 1152 1153 if (size == 1) { 1154 #if defined(PETSC_USE_COMPLEX) 1155 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1156 n = N; 1157 #else 1158 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1159 n = tot_dim; 1160 #endif 1161 1162 } else { 1163 ptrdiff_t local_n0,local_0_start; 1164 switch (ndim) { 1165 case 1: 1166 #if !defined(PETSC_USE_COMPLEX) 1167 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1168 #else 1169 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1170 1171 n = (PetscInt)local_n0; 1172 n1 = (PetscInt)local_n1; 1173 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1174 #endif 1175 break; 1176 case 2: 1177 #if defined(PETSC_USE_COMPLEX) 1178 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1179 /* 1180 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1181 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1182 */ 1183 n = (PetscInt)local_n0*dim[1]; 1184 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1185 #else 1186 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1187 1188 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1189 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1190 #endif 1191 break; 1192 case 3: 1193 #if defined(PETSC_USE_COMPLEX) 1194 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1195 1196 n = (PetscInt)local_n0*dim[1]*dim[2]; 1197 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1198 #else 1199 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1200 1201 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1202 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1203 #endif 1204 break; 1205 default: 1206 #if defined(PETSC_USE_COMPLEX) 1207 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1208 1209 n = (PetscInt)local_n0*partial_dim; 1210 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1211 #else 1212 temp = pdim[ndim-1]; 1213 1214 pdim[ndim-1] = temp/2 + 1; 1215 1216 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1217 1218 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1219 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1220 1221 pdim[ndim-1] = temp; 1222 1223 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1224 #endif 1225 break; 1226 } 1227 } 1228 free(pdim); 1229 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1230 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1231 fft->data = (void*)fftw; 1232 1233 fft->n = n; 1234 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1235 fftw->partial_dim = partial_dim; 1236 1237 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1238 if (size == 1) { 1239 #if defined(PETSC_USE_64BIT_INDICES) 1240 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1241 #else 1242 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1243 #endif 1244 } 1245 1246 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1247 1248 fftw->p_forward = 0; 1249 fftw->p_backward = 0; 1250 fftw->p_flag = FFTW_ESTIMATE; 1251 1252 if (size == 1) { 1253 A->ops->mult = MatMult_SeqFFTW; 1254 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1255 } else { 1256 A->ops->mult = MatMult_MPIFFTW; 1257 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1258 } 1259 fft->matdestroy = MatDestroy_FFTW; 1260 A->assembled = PETSC_TRUE; 1261 A->preallocated = PETSC_TRUE; 1262 1263 ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1264 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1265 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1266 1267 /* get runtime options */ 1268 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1269 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1270 if (flg) { 1271 fftw->p_flag = iplans[p_flag]; 1272 } 1273 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 1278 1279 1280