1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 #if defined(PETSC_USE_64BIT_INDICES) 15 fftw_iodim64 *iodims; 16 #else 17 fftw_iodim *iodims; 18 #endif 19 PetscInt partial_dim; 20 fftw_plan p_forward,p_backward; 21 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23 executed for the arrays with which the plan was created */ 24 } Mat_FFTW; 25 26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30 extern PetscErrorCode MatDestroy_FFTW(Mat); 31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*); 33 34 /* MatMult_SeqFFTW performs forward DFT in parallel 35 Input parameter: 36 A - the matrix 37 x - the vector on which FDFT will be performed 38 39 Output parameter: 40 y - vector that stores result of FDFT 41 */ 42 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 43 { 44 PetscErrorCode ierr; 45 Mat_FFT *fft = (Mat_FFT*)A->data; 46 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 47 const PetscScalar *x_array; 48 PetscScalar *y_array; 49 #if defined(PETSC_USE_COMPLEX) 50 #if defined(PETSC_USE_64BIT_INDICES) 51 fftw_iodim64 *iodims; 52 #else 53 fftw_iodim *iodims; 54 #endif 55 PetscInt i; 56 #endif 57 PetscInt ndim = fft->ndim,*dim = fft->dim; 58 59 PetscFunctionBegin; 60 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 61 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 62 if (!fftw->p_forward) { /* create a plan, then excute it */ 63 switch (ndim) { 64 case 1: 65 #if defined(PETSC_USE_COMPLEX) 66 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 67 #else 68 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 69 #endif 70 break; 71 case 2: 72 #if defined(PETSC_USE_COMPLEX) 73 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 74 #else 75 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 76 #endif 77 break; 78 case 3: 79 #if defined(PETSC_USE_COMPLEX) 80 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 81 #else 82 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 83 #endif 84 break; 85 default: 86 #if defined(PETSC_USE_COMPLEX) 87 iodims = fftw->iodims; 88 #if defined(PETSC_USE_64BIT_INDICES) 89 if (ndim) { 90 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 91 iodims[ndim-1].is = iodims[ndim-1].os = 1; 92 for (i=ndim-2; i>=0; --i) { 93 iodims[i].n = (ptrdiff_t)dim[i]; 94 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 95 } 96 } 97 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 98 #else 99 if (ndim) { 100 iodims[ndim-1].n = (int)dim[ndim-1]; 101 iodims[ndim-1].is = iodims[ndim-1].os = 1; 102 for (i=ndim-2; i>=0; --i) { 103 iodims[i].n = (int)dim[i]; 104 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 105 } 106 } 107 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 108 #endif 109 110 #else 111 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 112 #endif 113 break; 114 } 115 fftw->finarray = (PetscScalar *) x_array; 116 fftw->foutarray = y_array; 117 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 118 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 119 fftw_execute(fftw->p_forward); 120 } else { /* use existing plan */ 121 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 122 #if defined(PETSC_USE_COMPLEX) 123 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 124 #else 125 fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 126 #endif 127 } else { 128 fftw_execute(fftw->p_forward); 129 } 130 } 131 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 132 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 /* MatMultTranspose_SeqFFTW performs serial backward DFT 137 Input parameter: 138 A - the matrix 139 x - the vector on which BDFT will be performed 140 141 Output parameter: 142 y - vector that stores result of BDFT 143 */ 144 145 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 146 { 147 PetscErrorCode ierr; 148 Mat_FFT *fft = (Mat_FFT*)A->data; 149 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 150 const PetscScalar *x_array; 151 PetscScalar *y_array; 152 PetscInt ndim=fft->ndim,*dim=fft->dim; 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_64BIT_INDICES) 155 fftw_iodim64 *iodims=fftw->iodims; 156 #else 157 fftw_iodim *iodims=fftw->iodims; 158 #endif 159 #endif 160 161 PetscFunctionBegin; 162 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 163 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 164 if (!fftw->p_backward) { /* create a plan, then excute it */ 165 switch (ndim) { 166 case 1: 167 #if defined(PETSC_USE_COMPLEX) 168 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 169 #else 170 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 171 #endif 172 break; 173 case 2: 174 #if defined(PETSC_USE_COMPLEX) 175 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 176 #else 177 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 178 #endif 179 break; 180 case 3: 181 #if defined(PETSC_USE_COMPLEX) 182 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 183 #else 184 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 185 #endif 186 break; 187 default: 188 #if defined(PETSC_USE_COMPLEX) 189 #if defined(PETSC_USE_64BIT_INDICES) 190 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 191 #else 192 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 193 #endif 194 #else 195 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 196 #endif 197 break; 198 } 199 fftw->binarray = (PetscScalar *) x_array; 200 fftw->boutarray = y_array; 201 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 202 } else { /* use existing plan */ 203 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 204 #if defined(PETSC_USE_COMPLEX) 205 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 206 #else 207 fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 208 #endif 209 } else { 210 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 211 } 212 } 213 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 /* MatMult_MPIFFTW performs forward DFT in parallel 219 Input parameter: 220 A - the matrix 221 x - the vector on which FDFT will be performed 222 223 Output parameter: 224 y - vector that stores result of FDFT 225 */ 226 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 227 { 228 PetscErrorCode ierr; 229 Mat_FFT *fft = (Mat_FFT*)A->data; 230 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 231 const PetscScalar *x_array; 232 PetscScalar *y_array; 233 PetscInt ndim=fft->ndim,*dim=fft->dim; 234 MPI_Comm comm; 235 236 PetscFunctionBegin; 237 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 238 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 239 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 240 if (!fftw->p_forward) { /* create a plan, then excute it */ 241 switch (ndim) { 242 case 1: 243 #if defined(PETSC_USE_COMPLEX) 244 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 245 #else 246 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 247 #endif 248 break; 249 case 2: 250 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 251 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 252 #else 253 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 254 #endif 255 break; 256 case 3: 257 #if defined(PETSC_USE_COMPLEX) 258 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 259 #else 260 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 261 #endif 262 break; 263 default: 264 #if defined(PETSC_USE_COMPLEX) 265 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 266 #else 267 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 268 #endif 269 break; 270 } 271 fftw->finarray = (PetscScalar *) x_array; 272 fftw->foutarray = y_array; 273 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 274 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 275 fftw_execute(fftw->p_forward); 276 } else { /* use existing plan */ 277 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 278 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 279 } else { 280 fftw_execute(fftw->p_forward); 281 } 282 } 283 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 284 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 289 Input parameter: 290 A - the matrix 291 x - the vector on which BDFT will be performed 292 293 Output parameter: 294 y - vector that stores result of BDFT 295 */ 296 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 297 { 298 PetscErrorCode ierr; 299 Mat_FFT *fft = (Mat_FFT*)A->data; 300 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 301 const PetscScalar *x_array; 302 PetscScalar *y_array; 303 PetscInt ndim=fft->ndim,*dim=fft->dim; 304 MPI_Comm comm; 305 306 PetscFunctionBegin; 307 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 308 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 309 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 310 if (!fftw->p_backward) { /* create a plan, then excute it */ 311 switch (ndim) { 312 case 1: 313 #if defined(PETSC_USE_COMPLEX) 314 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 315 #else 316 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 317 #endif 318 break; 319 case 2: 320 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 321 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 322 #else 323 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 324 #endif 325 break; 326 case 3: 327 #if defined(PETSC_USE_COMPLEX) 328 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 329 #else 330 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 331 #endif 332 break; 333 default: 334 #if defined(PETSC_USE_COMPLEX) 335 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 336 #else 337 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 338 #endif 339 break; 340 } 341 fftw->binarray = (PetscScalar *) x_array; 342 fftw->boutarray = y_array; 343 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 344 } else { /* use existing plan */ 345 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 346 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 347 } else { 348 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 349 } 350 } 351 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 352 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 PetscErrorCode MatDestroy_FFTW(Mat A) 357 { 358 Mat_FFT *fft = (Mat_FFT*)A->data; 359 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 360 PetscErrorCode ierr; 361 362 PetscFunctionBegin; 363 fftw_destroy_plan(fftw->p_forward); 364 fftw_destroy_plan(fftw->p_backward); 365 if (fftw->iodims) { 366 free(fftw->iodims); 367 } 368 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 369 ierr = PetscFree(fft->data);CHKERRQ(ierr); 370 fftw_mpi_cleanup(); 371 PetscFunctionReturn(0); 372 } 373 374 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 375 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 376 { 377 PetscErrorCode ierr; 378 PetscScalar *array; 379 380 PetscFunctionBegin; 381 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 382 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 383 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 384 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 389 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new) 390 { 391 PetscErrorCode ierr; 392 Mat A; 393 394 PetscFunctionBegin; 395 ierr = PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 396 ierr = MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);CHKERRQ(ierr); 397 PetscFunctionReturn(0); 398 } 399 400 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new) 401 { 402 PetscErrorCode ierr; 403 Mat A; 404 405 PetscFunctionBegin; 406 ierr = PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 407 ierr = MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 411 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 412 { 413 PetscErrorCode ierr; 414 Mat A; 415 416 PetscFunctionBegin; 417 ierr = PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 418 ierr = MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 423 /*@ 424 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 425 parallel layout determined by FFTW 426 427 Collective on Mat 428 429 Input Parameter: 430 . A - the matrix 431 432 Output Parameter: 433 + x - (optional) input vector of forward FFTW 434 - y - (optional) output vector of forward FFTW 435 - z - (optional) output vector of backward FFTW 436 437 Level: advanced 438 439 Note: The parallel layout of output of forward FFTW is always same as the input 440 of the backward FFTW. But parallel layout of the input vector of forward 441 FFTW might not be same as the output of backward FFTW. 442 Also note that we need to provide enough space while doing parallel real transform. 443 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 444 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 445 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 446 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 447 zeros if it is odd only one zero is needed. 448 Lastly one needs some scratch space at the end of data set in each process. alloc_local 449 figures out how much space is needed, i.e. it figures out the data+scratch space for 450 each processor and returns that. 451 452 .seealso: MatCreateFFTW() 453 @*/ 454 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 455 { 456 PetscErrorCode ierr; 457 458 PetscFunctionBegin; 459 ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 464 { 465 PetscErrorCode ierr; 466 PetscMPIInt size,rank; 467 MPI_Comm comm; 468 Mat_FFT *fft = (Mat_FFT*)A->data; 469 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 470 PetscInt N = fft->N; 471 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 472 473 PetscFunctionBegin; 474 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 475 PetscValidType(A,1); 476 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 477 478 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 479 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 480 if (size == 1) { /* sequential case */ 481 #if defined(PETSC_USE_COMPLEX) 482 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 483 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 484 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 485 #else 486 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 487 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 488 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 489 #endif 490 } else { /* parallel cases */ 491 ptrdiff_t alloc_local,local_n0,local_0_start; 492 ptrdiff_t local_n1; 493 fftw_complex *data_fout; 494 ptrdiff_t local_1_start; 495 #if defined(PETSC_USE_COMPLEX) 496 fftw_complex *data_fin,*data_bout; 497 #else 498 double *data_finr,*data_boutr; 499 PetscInt n1,N1; 500 ptrdiff_t temp; 501 #endif 502 503 switch (ndim) { 504 case 1: 505 #if !defined(PETSC_USE_COMPLEX) 506 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 507 #else 508 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 509 if (fin) { 510 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 511 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 512 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 513 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 514 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 515 } 516 if (fout) { 517 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 518 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 519 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 520 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 521 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 522 } 523 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 524 if (bout) { 525 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 526 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 527 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 528 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 529 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 530 } 531 break; 532 #endif 533 case 2: 534 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 535 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 536 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 537 if (fin) { 538 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 539 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 540 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 541 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 542 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 543 } 544 if (fout) { 545 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 546 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 547 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 548 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 549 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 550 } 551 if (bout) { 552 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 553 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 554 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 555 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 556 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 557 } 558 #else 559 /* Get local size */ 560 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 561 if (fin) { 562 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 563 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 564 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 565 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 566 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 567 } 568 if (fout) { 569 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 570 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 571 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 572 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 573 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 574 } 575 if (bout) { 576 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 577 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 578 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 579 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 580 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 581 } 582 #endif 583 break; 584 case 3: 585 #if !defined(PETSC_USE_COMPLEX) 586 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 587 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 588 if (fin) { 589 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 590 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 591 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 592 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 593 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 594 } 595 if (fout) { 596 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 597 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 598 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 599 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 600 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 601 } 602 if (bout) { 603 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 604 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 605 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 606 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 607 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 608 } 609 #else 610 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 611 if (fin) { 612 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 613 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 614 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 615 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 616 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 617 } 618 if (fout) { 619 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 620 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 621 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 622 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 623 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 624 } 625 if (bout) { 626 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 627 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 628 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 629 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 630 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 631 } 632 #endif 633 break; 634 default: 635 #if !defined(PETSC_USE_COMPLEX) 636 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 637 638 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 639 640 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 641 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 642 643 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 644 645 if (fin) { 646 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 647 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 648 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 649 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 650 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 651 } 652 if (fout) { 653 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 654 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 655 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 656 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 657 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 658 } 659 if (bout) { 660 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 661 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 662 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 663 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 664 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 665 } 666 #else 667 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 668 if (fin) { 669 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 670 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 671 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 672 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 673 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 674 } 675 if (fout) { 676 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 677 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 678 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 679 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 680 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 681 } 682 if (bout) { 683 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 684 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 685 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 686 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 687 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 688 } 689 #endif 690 break; 691 } 692 } 693 PetscFunctionReturn(0); 694 } 695 696 /*@ 697 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 698 699 Collective on Mat 700 701 Input Parameters: 702 + A - FFTW matrix 703 - x - the PETSc vector 704 705 Output Parameters: 706 . y - the FFTW vector 707 708 Options Database Keys: 709 . -mat_fftw_plannerflags - set FFTW planner flags 710 711 Level: intermediate 712 713 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 714 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 715 zeros. This routine does that job by scattering operation. 716 717 .seealso: VecScatterFFTWToPetsc() 718 @*/ 719 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 720 { 721 PetscErrorCode ierr; 722 723 PetscFunctionBegin; 724 ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 729 { 730 PetscErrorCode ierr; 731 MPI_Comm comm; 732 Mat_FFT *fft = (Mat_FFT*)A->data; 733 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 734 PetscInt N =fft->N; 735 PetscInt ndim =fft->ndim,*dim=fft->dim; 736 PetscInt low; 737 PetscMPIInt rank,size; 738 PetscInt vsize,vsize1; 739 ptrdiff_t local_n0,local_0_start; 740 ptrdiff_t local_n1,local_1_start; 741 VecScatter vecscat; 742 IS list1,list2; 743 #if !defined(PETSC_USE_COMPLEX) 744 PetscInt i,j,k,partial_dim; 745 PetscInt *indx1, *indx2, tempindx, tempindx1; 746 PetscInt NM; 747 ptrdiff_t temp; 748 #endif 749 750 PetscFunctionBegin; 751 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 752 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 753 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 754 ierr = VecGetOwnershipRange(y,&low,NULL); 755 756 if (size==1) { 757 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 758 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 759 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 760 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 761 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 762 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 764 ierr = ISDestroy(&list1);CHKERRQ(ierr); 765 } else { 766 switch (ndim) { 767 case 1: 768 #if defined(PETSC_USE_COMPLEX) 769 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 770 771 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 772 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 773 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 774 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 775 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 776 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 777 ierr = ISDestroy(&list1);CHKERRQ(ierr); 778 ierr = ISDestroy(&list2);CHKERRQ(ierr); 779 #else 780 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 781 #endif 782 break; 783 case 2: 784 #if defined(PETSC_USE_COMPLEX) 785 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 786 787 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 788 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 789 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 790 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 793 ierr = ISDestroy(&list1);CHKERRQ(ierr); 794 ierr = ISDestroy(&list2);CHKERRQ(ierr); 795 #else 796 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 797 798 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 799 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 800 801 if (dim[1]%2==0) { 802 NM = dim[1]+2; 803 } else { 804 NM = dim[1]+1; 805 } 806 for (i=0; i<local_n0; i++) { 807 for (j=0; j<dim[1]; j++) { 808 tempindx = i*dim[1] + j; 809 tempindx1 = i*NM + j; 810 811 indx1[tempindx]=local_0_start*dim[1]+tempindx; 812 indx2[tempindx]=low+tempindx1; 813 } 814 } 815 816 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 817 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 818 819 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 820 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 821 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 822 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 823 ierr = ISDestroy(&list1);CHKERRQ(ierr); 824 ierr = ISDestroy(&list2);CHKERRQ(ierr); 825 ierr = PetscFree(indx1);CHKERRQ(ierr); 826 ierr = PetscFree(indx2);CHKERRQ(ierr); 827 #endif 828 break; 829 830 case 3: 831 #if defined(PETSC_USE_COMPLEX) 832 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 833 834 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 835 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 836 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 837 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 838 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 839 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 840 ierr = ISDestroy(&list1);CHKERRQ(ierr); 841 ierr = ISDestroy(&list2);CHKERRQ(ierr); 842 #else 843 /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 844 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 845 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 846 847 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 848 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 849 850 if (dim[2]%2==0) NM = dim[2]+2; 851 else NM = dim[2]+1; 852 853 for (i=0; i<local_n0; i++) { 854 for (j=0; j<dim[1]; j++) { 855 for (k=0; k<dim[2]; k++) { 856 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 857 tempindx1 = i*dim[1]*NM + j*NM + k; 858 859 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 860 indx2[tempindx]=low+tempindx1; 861 } 862 } 863 } 864 865 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 866 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 867 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 868 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 870 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 871 ierr = ISDestroy(&list1);CHKERRQ(ierr); 872 ierr = ISDestroy(&list2);CHKERRQ(ierr); 873 ierr = PetscFree(indx1);CHKERRQ(ierr); 874 ierr = PetscFree(indx2);CHKERRQ(ierr); 875 #endif 876 break; 877 878 default: 879 #if defined(PETSC_USE_COMPLEX) 880 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 881 882 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 883 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 884 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 885 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 888 ierr = ISDestroy(&list1);CHKERRQ(ierr); 889 ierr = ISDestroy(&list2);CHKERRQ(ierr); 890 #else 891 /* buggy, needs to be fixed. See src/mat/examples/tests/ex158.c */ 892 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 893 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 894 895 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 896 897 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 898 899 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 900 901 partial_dim = fftw->partial_dim; 902 903 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 904 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 905 906 if (dim[ndim-1]%2==0) NM = 2; 907 else NM = 1; 908 909 j = low; 910 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 911 indx1[i] = local_0_start*partial_dim + i; 912 indx2[i] = j; 913 if (k%dim[ndim-1]==0) j+=NM; 914 j++; 915 } 916 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 917 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 918 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 919 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 922 ierr = ISDestroy(&list1);CHKERRQ(ierr); 923 ierr = ISDestroy(&list2);CHKERRQ(ierr); 924 ierr = PetscFree(indx1);CHKERRQ(ierr); 925 ierr = PetscFree(indx2);CHKERRQ(ierr); 926 #endif 927 break; 928 } 929 } 930 PetscFunctionReturn(0); 931 } 932 933 /*@ 934 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 935 936 Collective on Mat 937 938 Input Parameters: 939 + A - FFTW matrix 940 - x - FFTW vector 941 942 Output Parameters: 943 . y - PETSc vector 944 945 Level: intermediate 946 947 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 948 VecScatterFFTWToPetsc removes those extra zeros. 949 950 .seealso: VecScatterPetscToFFTW() 951 @*/ 952 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 953 { 954 PetscErrorCode ierr; 955 956 PetscFunctionBegin; 957 ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 962 { 963 PetscErrorCode ierr; 964 MPI_Comm comm; 965 Mat_FFT *fft = (Mat_FFT*)A->data; 966 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 967 PetscInt N = fft->N; 968 PetscInt ndim = fft->ndim,*dim=fft->dim; 969 PetscInt low; 970 PetscMPIInt rank,size; 971 ptrdiff_t local_n0,local_0_start; 972 ptrdiff_t local_n1,local_1_start; 973 VecScatter vecscat; 974 IS list1,list2; 975 #if !defined(PETSC_USE_COMPLEX) 976 PetscInt i,j,k,partial_dim; 977 PetscInt *indx1, *indx2, tempindx, tempindx1; 978 PetscInt NM; 979 ptrdiff_t temp; 980 #endif 981 982 PetscFunctionBegin; 983 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 984 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 985 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 986 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 987 988 if (size==1) { 989 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 990 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 991 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 994 ierr = ISDestroy(&list1);CHKERRQ(ierr); 995 996 } else { 997 switch (ndim) { 998 case 1: 999 #if defined(PETSC_USE_COMPLEX) 1000 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1001 1002 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 1003 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 1004 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1008 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1009 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1010 #else 1011 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1012 #endif 1013 break; 1014 case 2: 1015 #if defined(PETSC_USE_COMPLEX) 1016 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1017 1018 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1019 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 1020 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1021 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1022 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1023 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1024 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1025 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1026 #else 1027 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1028 1029 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1030 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 1031 1032 if (dim[1]%2==0) NM = dim[1]+2; 1033 else NM = dim[1]+1; 1034 1035 for (i=0; i<local_n0; i++) { 1036 for (j=0; j<dim[1]; j++) { 1037 tempindx = i*dim[1] + j; 1038 tempindx1 = i*NM + j; 1039 1040 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1041 indx2[tempindx]=low+tempindx1; 1042 } 1043 } 1044 1045 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1046 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1047 1048 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1049 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1051 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1052 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1053 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1054 ierr = PetscFree(indx1);CHKERRQ(ierr); 1055 ierr = PetscFree(indx2);CHKERRQ(ierr); 1056 #endif 1057 break; 1058 case 3: 1059 #if defined(PETSC_USE_COMPLEX) 1060 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1061 1062 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1063 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1064 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1065 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1066 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1067 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1068 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1069 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1070 #else 1071 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1072 1073 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1074 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1075 1076 if (dim[2]%2==0) NM = dim[2]+2; 1077 else NM = dim[2]+1; 1078 1079 for (i=0; i<local_n0; i++) { 1080 for (j=0; j<dim[1]; j++) { 1081 for (k=0; k<dim[2]; k++) { 1082 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1083 tempindx1 = i*dim[1]*NM + j*NM + k; 1084 1085 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1086 indx2[tempindx]=low+tempindx1; 1087 } 1088 } 1089 } 1090 1091 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1092 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1093 1094 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1095 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1096 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1097 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1098 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1099 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1100 ierr = PetscFree(indx1);CHKERRQ(ierr); 1101 ierr = PetscFree(indx2);CHKERRQ(ierr); 1102 #endif 1103 break; 1104 default: 1105 #if defined(PETSC_USE_COMPLEX) 1106 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1107 1108 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1109 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1110 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1111 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1113 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1114 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1115 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1116 #else 1117 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1118 1119 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1120 1121 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1122 1123 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1124 1125 partial_dim = fftw->partial_dim; 1126 1127 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1128 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1129 1130 if (dim[ndim-1]%2==0) NM = 2; 1131 else NM = 1; 1132 1133 j = low; 1134 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1135 indx1[i] = local_0_start*partial_dim + i; 1136 indx2[i] = j; 1137 if (k%dim[ndim-1]==0) j+=NM; 1138 j++; 1139 } 1140 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1141 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1142 1143 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1144 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1146 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1147 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1148 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1149 ierr = PetscFree(indx1);CHKERRQ(ierr); 1150 ierr = PetscFree(indx2);CHKERRQ(ierr); 1151 #endif 1152 break; 1153 } 1154 } 1155 PetscFunctionReturn(0); 1156 } 1157 1158 /* 1159 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1160 1161 Options Database Keys: 1162 + -mat_fftw_plannerflags - set FFTW planner flags 1163 1164 Level: intermediate 1165 1166 */ 1167 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1168 { 1169 PetscErrorCode ierr; 1170 MPI_Comm comm; 1171 Mat_FFT *fft=(Mat_FFT*)A->data; 1172 Mat_FFTW *fftw; 1173 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1174 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1175 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1176 PetscBool flg; 1177 PetscInt p_flag,partial_dim=1,ctr; 1178 PetscMPIInt size,rank; 1179 ptrdiff_t *pdim; 1180 ptrdiff_t local_n1,local_1_start; 1181 #if !defined(PETSC_USE_COMPLEX) 1182 ptrdiff_t temp; 1183 PetscInt N1,tot_dim; 1184 #else 1185 PetscInt n1; 1186 #endif 1187 1188 PetscFunctionBegin; 1189 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1190 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1191 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1192 1193 fftw_mpi_init(); 1194 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1195 pdim[0] = dim[0]; 1196 #if !defined(PETSC_USE_COMPLEX) 1197 tot_dim = 2*dim[0]; 1198 #endif 1199 for (ctr=1; ctr<ndim; ctr++) { 1200 partial_dim *= dim[ctr]; 1201 pdim[ctr] = dim[ctr]; 1202 #if !defined(PETSC_USE_COMPLEX) 1203 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1204 else tot_dim *= dim[ctr]; 1205 #endif 1206 } 1207 1208 if (size == 1) { 1209 #if defined(PETSC_USE_COMPLEX) 1210 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1211 n = N; 1212 #else 1213 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1214 n = tot_dim; 1215 #endif 1216 1217 } else { 1218 ptrdiff_t local_n0,local_0_start; 1219 switch (ndim) { 1220 case 1: 1221 #if !defined(PETSC_USE_COMPLEX) 1222 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1223 #else 1224 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1225 1226 n = (PetscInt)local_n0; 1227 n1 = (PetscInt)local_n1; 1228 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1229 #endif 1230 break; 1231 case 2: 1232 #if defined(PETSC_USE_COMPLEX) 1233 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1234 /* 1235 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1236 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1237 */ 1238 n = (PetscInt)local_n0*dim[1]; 1239 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1240 #else 1241 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1242 1243 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1244 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1245 #endif 1246 break; 1247 case 3: 1248 #if defined(PETSC_USE_COMPLEX) 1249 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1250 1251 n = (PetscInt)local_n0*dim[1]*dim[2]; 1252 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1253 #else 1254 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1255 1256 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1257 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1258 #endif 1259 break; 1260 default: 1261 #if defined(PETSC_USE_COMPLEX) 1262 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1263 1264 n = (PetscInt)local_n0*partial_dim; 1265 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1266 #else 1267 temp = pdim[ndim-1]; 1268 1269 pdim[ndim-1] = temp/2 + 1; 1270 1271 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1272 1273 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1274 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1275 1276 pdim[ndim-1] = temp; 1277 1278 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1279 #endif 1280 break; 1281 } 1282 } 1283 free(pdim); 1284 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1285 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1286 fft->data = (void*)fftw; 1287 1288 fft->n = n; 1289 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1290 fftw->partial_dim = partial_dim; 1291 1292 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1293 if (size == 1) { 1294 #if defined(PETSC_USE_64BIT_INDICES) 1295 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1296 #else 1297 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1298 #endif 1299 } 1300 1301 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1302 1303 fftw->p_forward = 0; 1304 fftw->p_backward = 0; 1305 fftw->p_flag = FFTW_ESTIMATE; 1306 1307 if (size == 1) { 1308 A->ops->mult = MatMult_SeqFFTW; 1309 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1310 } else { 1311 A->ops->mult = MatMult_MPIFFTW; 1312 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1313 } 1314 fft->matdestroy = MatDestroy_FFTW; 1315 A->assembled = PETSC_TRUE; 1316 A->preallocated = PETSC_TRUE; 1317 1318 ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1319 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1320 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1321 1322 /* get runtime options */ 1323 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1324 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1325 if (flg) { 1326 fftw->p_flag = iplans[p_flag]; 1327 } 1328 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 1333 1334 1335