1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 #if defined(PETSC_USE_64BIT_INDICES) 15 fftw_iodim64 *iodims; 16 #else 17 fftw_iodim *iodims; 18 #endif 19 PetscInt partial_dim; 20 fftw_plan p_forward,p_backward; 21 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23 executed for the arrays with which the plan was created */ 24 } Mat_FFTW; 25 26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30 extern PetscErrorCode MatDestroy_FFTW(Mat); 31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*); 33 34 /* MatMult_SeqFFTW performs forward DFT in parallel 35 Input parameter: 36 A - the matrix 37 x - the vector on which FDFT will be performed 38 39 Output parameter: 40 y - vector that stores result of FDFT 41 */ 42 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 43 { 44 PetscErrorCode ierr; 45 Mat_FFT *fft = (Mat_FFT*)A->data; 46 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 47 const PetscScalar *x_array; 48 PetscScalar *y_array; 49 #if defined(PETSC_USE_COMPLEX) 50 #if defined(PETSC_USE_64BIT_INDICES) 51 fftw_iodim64 *iodims; 52 #else 53 fftw_iodim *iodims; 54 #endif 55 PetscInt i; 56 #endif 57 PetscInt ndim = fft->ndim,*dim = fft->dim; 58 59 PetscFunctionBegin; 60 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 61 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 62 if (!fftw->p_forward) { /* create a plan, then excute it */ 63 switch (ndim) { 64 case 1: 65 #if defined(PETSC_USE_COMPLEX) 66 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 67 #else 68 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 69 #endif 70 break; 71 case 2: 72 #if defined(PETSC_USE_COMPLEX) 73 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 74 #else 75 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 76 #endif 77 break; 78 case 3: 79 #if defined(PETSC_USE_COMPLEX) 80 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 81 #else 82 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 83 #endif 84 break; 85 default: 86 #if defined(PETSC_USE_COMPLEX) 87 iodims = fftw->iodims; 88 #if defined(PETSC_USE_64BIT_INDICES) 89 if (ndim) { 90 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 91 iodims[ndim-1].is = iodims[ndim-1].os = 1; 92 for (i=ndim-2; i>=0; --i) { 93 iodims[i].n = (ptrdiff_t)dim[i]; 94 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 95 } 96 } 97 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 98 #else 99 if (ndim) { 100 iodims[ndim-1].n = (int)dim[ndim-1]; 101 iodims[ndim-1].is = iodims[ndim-1].os = 1; 102 for (i=ndim-2; i>=0; --i) { 103 iodims[i].n = (int)dim[i]; 104 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 105 } 106 } 107 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 108 #endif 109 110 #else 111 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 112 #endif 113 break; 114 } 115 fftw->finarray = (PetscScalar *) x_array; 116 fftw->foutarray = y_array; 117 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 118 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 119 fftw_execute(fftw->p_forward); 120 } else { /* use existing plan */ 121 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 122 #if defined(PETSC_USE_COMPLEX) 123 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 124 #else 125 fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 126 #endif 127 } else { 128 fftw_execute(fftw->p_forward); 129 } 130 } 131 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 132 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 /* MatMultTranspose_SeqFFTW performs serial backward DFT 137 Input parameter: 138 A - the matrix 139 x - the vector on which BDFT will be performed 140 141 Output parameter: 142 y - vector that stores result of BDFT 143 */ 144 145 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 146 { 147 PetscErrorCode ierr; 148 Mat_FFT *fft = (Mat_FFT*)A->data; 149 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 150 const PetscScalar *x_array; 151 PetscScalar *y_array; 152 PetscInt ndim=fft->ndim,*dim=fft->dim; 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_64BIT_INDICES) 155 fftw_iodim64 *iodims=fftw->iodims; 156 #else 157 fftw_iodim *iodims=fftw->iodims; 158 #endif 159 #endif 160 161 PetscFunctionBegin; 162 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 163 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 164 if (!fftw->p_backward) { /* create a plan, then excute it */ 165 switch (ndim) { 166 case 1: 167 #if defined(PETSC_USE_COMPLEX) 168 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 169 #else 170 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 171 #endif 172 break; 173 case 2: 174 #if defined(PETSC_USE_COMPLEX) 175 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 176 #else 177 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 178 #endif 179 break; 180 case 3: 181 #if defined(PETSC_USE_COMPLEX) 182 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 183 #else 184 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 185 #endif 186 break; 187 default: 188 #if defined(PETSC_USE_COMPLEX) 189 #if defined(PETSC_USE_64BIT_INDICES) 190 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 191 #else 192 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 193 #endif 194 #else 195 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 196 #endif 197 break; 198 } 199 fftw->binarray = (PetscScalar *) x_array; 200 fftw->boutarray = y_array; 201 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 202 } else { /* use existing plan */ 203 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 204 #if defined(PETSC_USE_COMPLEX) 205 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 206 #else 207 fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 208 #endif 209 } else { 210 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 211 } 212 } 213 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 /* MatMult_MPIFFTW performs forward DFT in parallel 219 Input parameter: 220 A - the matrix 221 x - the vector on which FDFT will be performed 222 223 Output parameter: 224 y - vector that stores result of FDFT 225 */ 226 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 227 { 228 PetscErrorCode ierr; 229 Mat_FFT *fft = (Mat_FFT*)A->data; 230 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 231 const PetscScalar *x_array; 232 PetscScalar *y_array; 233 PetscInt ndim=fft->ndim,*dim=fft->dim; 234 MPI_Comm comm; 235 236 PetscFunctionBegin; 237 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 238 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 239 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 240 if (!fftw->p_forward) { /* create a plan, then excute it */ 241 switch (ndim) { 242 case 1: 243 #if defined(PETSC_USE_COMPLEX) 244 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 245 #else 246 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 247 #endif 248 break; 249 case 2: 250 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 251 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 252 #else 253 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 254 #endif 255 break; 256 case 3: 257 #if defined(PETSC_USE_COMPLEX) 258 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 259 #else 260 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 261 #endif 262 break; 263 default: 264 #if defined(PETSC_USE_COMPLEX) 265 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 266 #else 267 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 268 #endif 269 break; 270 } 271 fftw->finarray = (PetscScalar *) x_array; 272 fftw->foutarray = y_array; 273 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 274 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 275 fftw_execute(fftw->p_forward); 276 } else { /* use existing plan */ 277 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 278 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 279 } else { 280 fftw_execute(fftw->p_forward); 281 } 282 } 283 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 284 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 289 Input parameter: 290 A - the matrix 291 x - the vector on which BDFT will be performed 292 293 Output parameter: 294 y - vector that stores result of BDFT 295 */ 296 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 297 { 298 PetscErrorCode ierr; 299 Mat_FFT *fft = (Mat_FFT*)A->data; 300 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 301 const PetscScalar *x_array; 302 PetscScalar *y_array; 303 PetscInt ndim=fft->ndim,*dim=fft->dim; 304 MPI_Comm comm; 305 306 PetscFunctionBegin; 307 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 308 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 309 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 310 if (!fftw->p_backward) { /* create a plan, then excute it */ 311 switch (ndim) { 312 case 1: 313 #if defined(PETSC_USE_COMPLEX) 314 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 315 #else 316 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 317 #endif 318 break; 319 case 2: 320 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 321 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 322 #else 323 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 324 #endif 325 break; 326 case 3: 327 #if defined(PETSC_USE_COMPLEX) 328 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 329 #else 330 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 331 #endif 332 break; 333 default: 334 #if defined(PETSC_USE_COMPLEX) 335 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 336 #else 337 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 338 #endif 339 break; 340 } 341 fftw->binarray = (PetscScalar *) x_array; 342 fftw->boutarray = y_array; 343 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 344 } else { /* use existing plan */ 345 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 346 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 347 } else { 348 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 349 } 350 } 351 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 352 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 PetscErrorCode MatDestroy_FFTW(Mat A) 357 { 358 Mat_FFT *fft = (Mat_FFT*)A->data; 359 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 360 PetscErrorCode ierr; 361 362 PetscFunctionBegin; 363 fftw_destroy_plan(fftw->p_forward); 364 fftw_destroy_plan(fftw->p_backward); 365 if (fftw->iodims) { 366 free(fftw->iodims); 367 } 368 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 369 ierr = PetscFree(fft->data);CHKERRQ(ierr); 370 fftw_mpi_cleanup(); 371 PetscFunctionReturn(0); 372 } 373 374 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 375 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 376 { 377 PetscErrorCode ierr; 378 PetscScalar *array; 379 380 PetscFunctionBegin; 381 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 382 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 383 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 384 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new) 389 { 390 PetscErrorCode ierr; 391 Mat A; 392 393 PetscFunctionBegin; 394 ierr = PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 395 ierr = MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);CHKERRQ(ierr); 396 PetscFunctionReturn(0); 397 } 398 399 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new) 400 { 401 PetscErrorCode ierr; 402 Mat A; 403 404 PetscFunctionBegin; 405 ierr = PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 406 ierr = MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 411 { 412 PetscErrorCode ierr; 413 Mat A; 414 415 PetscFunctionBegin; 416 ierr = PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 417 ierr = MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 /*@ 422 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 423 parallel layout determined by FFTW 424 425 Collective on Mat 426 427 Input Parameter: 428 . A - the matrix 429 430 Output Parameters: 431 + x - (optional) input vector of forward FFTW 432 . y - (optional) output vector of forward FFTW 433 - z - (optional) output vector of backward FFTW 434 435 Level: advanced 436 437 Note: The parallel layout of output of forward FFTW is always same as the input 438 of the backward FFTW. But parallel layout of the input vector of forward 439 FFTW might not be same as the output of backward FFTW. 440 Also note that we need to provide enough space while doing parallel real transform. 441 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 442 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 443 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 444 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 445 zeros if it is odd only one zero is needed. 446 Lastly one needs some scratch space at the end of data set in each process. alloc_local 447 figures out how much space is needed, i.e. it figures out the data+scratch space for 448 each processor and returns that. 449 450 .seealso: MatCreateFFT() 451 @*/ 452 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 453 { 454 PetscErrorCode ierr; 455 456 PetscFunctionBegin; 457 ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 462 { 463 PetscErrorCode ierr; 464 PetscMPIInt size,rank; 465 MPI_Comm comm; 466 Mat_FFT *fft = (Mat_FFT*)A->data; 467 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 468 PetscInt N = fft->N; 469 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 470 471 PetscFunctionBegin; 472 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 473 PetscValidType(A,1); 474 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 475 476 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 477 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 478 if (size == 1) { /* sequential case */ 479 #if defined(PETSC_USE_COMPLEX) 480 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 481 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 482 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 483 #else 484 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 485 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 486 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 487 #endif 488 } else { /* parallel cases */ 489 ptrdiff_t alloc_local,local_n0,local_0_start; 490 ptrdiff_t local_n1; 491 fftw_complex *data_fout; 492 ptrdiff_t local_1_start; 493 #if defined(PETSC_USE_COMPLEX) 494 fftw_complex *data_fin,*data_bout; 495 #else 496 double *data_finr,*data_boutr; 497 PetscInt n1,N1; 498 ptrdiff_t temp; 499 #endif 500 501 switch (ndim) { 502 case 1: 503 #if !defined(PETSC_USE_COMPLEX) 504 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 505 #else 506 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 507 if (fin) { 508 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 509 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 510 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 511 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 512 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 513 } 514 if (fout) { 515 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 516 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 517 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 518 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 519 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 520 } 521 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 522 if (bout) { 523 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 524 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 525 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 526 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 527 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 528 } 529 break; 530 #endif 531 case 2: 532 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 533 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 534 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 535 if (fin) { 536 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 537 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 538 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 539 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 540 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 541 } 542 if (fout) { 543 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 544 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 545 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 546 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 547 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 548 } 549 if (bout) { 550 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 551 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 552 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 553 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 554 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 555 } 556 #else 557 /* Get local size */ 558 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 559 if (fin) { 560 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 561 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 562 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 563 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 564 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 565 } 566 if (fout) { 567 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 568 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 569 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 570 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 571 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 572 } 573 if (bout) { 574 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 575 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 576 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 577 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 578 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 579 } 580 #endif 581 break; 582 case 3: 583 #if !defined(PETSC_USE_COMPLEX) 584 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 585 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 586 if (fin) { 587 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 588 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 589 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 590 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 591 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 592 } 593 if (fout) { 594 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 595 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 596 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 597 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 598 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 599 } 600 if (bout) { 601 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 602 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 603 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 604 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 605 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 606 } 607 #else 608 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 609 if (fin) { 610 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 611 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 612 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 613 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 614 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 615 } 616 if (fout) { 617 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 618 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 619 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 620 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 621 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 622 } 623 if (bout) { 624 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 625 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 626 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 627 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 628 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 629 } 630 #endif 631 break; 632 default: 633 #if !defined(PETSC_USE_COMPLEX) 634 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 635 636 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 637 638 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 639 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 640 641 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 642 643 if (fin) { 644 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 645 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 646 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 647 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 648 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 649 } 650 if (fout) { 651 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 652 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 653 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 654 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 655 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 656 } 657 if (bout) { 658 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 659 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 660 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 661 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 662 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 663 } 664 #else 665 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 666 if (fin) { 667 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 668 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 669 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 670 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 671 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 672 } 673 if (fout) { 674 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 675 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 676 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 677 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 678 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 679 } 680 if (bout) { 681 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 682 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 683 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 684 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 685 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 686 } 687 #endif 688 break; 689 } 690 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 691 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 692 memory leaks. We void these pointers here to avoid acident uses. 693 */ 694 if (fin) (*fin)->ops->replacearray = NULL; 695 if (fout) (*fout)->ops->replacearray = NULL; 696 if (bout) (*bout)->ops->replacearray = NULL; 697 } 698 PetscFunctionReturn(0); 699 } 700 701 /*@ 702 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 703 704 Collective on Mat 705 706 Input Parameters: 707 + A - FFTW matrix 708 - x - the PETSc vector 709 710 Output Parameters: 711 . y - the FFTW vector 712 713 Options Database Keys: 714 . -mat_fftw_plannerflags - set FFTW planner flags 715 716 Level: intermediate 717 718 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 719 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 720 zeros. This routine does that job by scattering operation. 721 722 .seealso: VecScatterFFTWToPetsc() 723 @*/ 724 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 725 { 726 PetscErrorCode ierr; 727 728 PetscFunctionBegin; 729 ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 730 PetscFunctionReturn(0); 731 } 732 733 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 734 { 735 PetscErrorCode ierr; 736 MPI_Comm comm; 737 Mat_FFT *fft = (Mat_FFT*)A->data; 738 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 739 PetscInt N =fft->N; 740 PetscInt ndim =fft->ndim,*dim=fft->dim; 741 PetscInt low; 742 PetscMPIInt rank,size; 743 PetscInt vsize,vsize1; 744 ptrdiff_t local_n0,local_0_start; 745 ptrdiff_t local_n1,local_1_start; 746 VecScatter vecscat; 747 IS list1,list2; 748 #if !defined(PETSC_USE_COMPLEX) 749 PetscInt i,j,k,partial_dim; 750 PetscInt *indx1, *indx2, tempindx, tempindx1; 751 PetscInt NM; 752 ptrdiff_t temp; 753 #endif 754 755 PetscFunctionBegin; 756 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 758 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 759 ierr = VecGetOwnershipRange(y,&low,NULL);CHKERRQ(ierr); 760 761 if (size==1) { 762 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 763 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 764 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 765 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 766 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 767 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 768 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 769 ierr = ISDestroy(&list1);CHKERRQ(ierr); 770 } else { 771 switch (ndim) { 772 case 1: 773 #if defined(PETSC_USE_COMPLEX) 774 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 775 776 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1);CHKERRQ(ierr); 777 ierr = ISCreateStride(comm,local_n0,low,1,&list2);CHKERRQ(ierr); 778 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 779 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 780 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 781 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 782 ierr = ISDestroy(&list1);CHKERRQ(ierr); 783 ierr = ISDestroy(&list2);CHKERRQ(ierr); 784 #else 785 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 786 #endif 787 break; 788 case 2: 789 #if defined(PETSC_USE_COMPLEX) 790 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 791 792 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);CHKERRQ(ierr); 793 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);CHKERRQ(ierr); 794 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 795 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 796 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 798 ierr = ISDestroy(&list1);CHKERRQ(ierr); 799 ierr = ISDestroy(&list2);CHKERRQ(ierr); 800 #else 801 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 802 803 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 804 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 805 806 if (dim[1]%2==0) { 807 NM = dim[1]+2; 808 } else { 809 NM = dim[1]+1; 810 } 811 for (i=0; i<local_n0; i++) { 812 for (j=0; j<dim[1]; j++) { 813 tempindx = i*dim[1] + j; 814 tempindx1 = i*NM + j; 815 816 indx1[tempindx]=local_0_start*dim[1]+tempindx; 817 indx2[tempindx]=low+tempindx1; 818 } 819 } 820 821 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 822 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 823 824 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 825 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 826 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 828 ierr = ISDestroy(&list1);CHKERRQ(ierr); 829 ierr = ISDestroy(&list2);CHKERRQ(ierr); 830 ierr = PetscFree(indx1);CHKERRQ(ierr); 831 ierr = PetscFree(indx2);CHKERRQ(ierr); 832 #endif 833 break; 834 835 case 3: 836 #if defined(PETSC_USE_COMPLEX) 837 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 838 839 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);CHKERRQ(ierr); 840 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);CHKERRQ(ierr); 841 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 842 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 844 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 845 ierr = ISDestroy(&list1);CHKERRQ(ierr); 846 ierr = ISDestroy(&list2);CHKERRQ(ierr); 847 #else 848 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 849 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 850 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 851 852 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 853 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 854 855 if (dim[2]%2==0) NM = dim[2]+2; 856 else NM = dim[2]+1; 857 858 for (i=0; i<local_n0; i++) { 859 for (j=0; j<dim[1]; j++) { 860 for (k=0; k<dim[2]; k++) { 861 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 862 tempindx1 = i*dim[1]*NM + j*NM + k; 863 864 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 865 indx2[tempindx]=low+tempindx1; 866 } 867 } 868 } 869 870 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 871 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 872 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 873 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 876 ierr = ISDestroy(&list1);CHKERRQ(ierr); 877 ierr = ISDestroy(&list2);CHKERRQ(ierr); 878 ierr = PetscFree(indx1);CHKERRQ(ierr); 879 ierr = PetscFree(indx2);CHKERRQ(ierr); 880 #endif 881 break; 882 883 default: 884 #if defined(PETSC_USE_COMPLEX) 885 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 886 887 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);CHKERRQ(ierr); 888 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);CHKERRQ(ierr); 889 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 890 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 891 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 893 ierr = ISDestroy(&list1);CHKERRQ(ierr); 894 ierr = ISDestroy(&list2);CHKERRQ(ierr); 895 #else 896 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 897 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 898 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 899 900 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 901 902 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 903 904 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 905 906 partial_dim = fftw->partial_dim; 907 908 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 909 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 910 911 if (dim[ndim-1]%2==0) NM = 2; 912 else NM = 1; 913 914 j = low; 915 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 916 indx1[i] = local_0_start*partial_dim + i; 917 indx2[i] = j; 918 if (k%dim[ndim-1]==0) j+=NM; 919 j++; 920 } 921 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 922 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 923 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 924 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 927 ierr = ISDestroy(&list1);CHKERRQ(ierr); 928 ierr = ISDestroy(&list2);CHKERRQ(ierr); 929 ierr = PetscFree(indx1);CHKERRQ(ierr); 930 ierr = PetscFree(indx2);CHKERRQ(ierr); 931 #endif 932 break; 933 } 934 } 935 PetscFunctionReturn(0); 936 } 937 938 /*@ 939 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 940 941 Collective on Mat 942 943 Input Parameters: 944 + A - FFTW matrix 945 - x - FFTW vector 946 947 Output Parameters: 948 . y - PETSc vector 949 950 Level: intermediate 951 952 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 953 VecScatterFFTWToPetsc removes those extra zeros. 954 955 .seealso: VecScatterPetscToFFTW() 956 @*/ 957 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 958 { 959 PetscErrorCode ierr; 960 961 PetscFunctionBegin; 962 ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 967 { 968 PetscErrorCode ierr; 969 MPI_Comm comm; 970 Mat_FFT *fft = (Mat_FFT*)A->data; 971 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 972 PetscInt N = fft->N; 973 PetscInt ndim = fft->ndim,*dim=fft->dim; 974 PetscInt low; 975 PetscMPIInt rank,size; 976 ptrdiff_t local_n0,local_0_start; 977 ptrdiff_t local_n1,local_1_start; 978 VecScatter vecscat; 979 IS list1,list2; 980 #if !defined(PETSC_USE_COMPLEX) 981 PetscInt i,j,k,partial_dim; 982 PetscInt *indx1, *indx2, tempindx, tempindx1; 983 PetscInt NM; 984 ptrdiff_t temp; 985 #endif 986 987 PetscFunctionBegin; 988 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 989 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 990 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 991 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 992 993 if (size==1) { 994 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 995 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 996 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 998 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 999 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1000 1001 } else { 1002 switch (ndim) { 1003 case 1: 1004 #if defined(PETSC_USE_COMPLEX) 1005 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1006 1007 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1);CHKERRQ(ierr); 1008 ierr = ISCreateStride(comm,local_n1,low,1,&list2);CHKERRQ(ierr); 1009 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1010 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1012 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1013 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1014 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1015 #else 1016 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1017 #endif 1018 break; 1019 case 2: 1020 #if defined(PETSC_USE_COMPLEX) 1021 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1022 1023 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1);CHKERRQ(ierr); 1024 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2);CHKERRQ(ierr); 1025 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1026 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1029 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1030 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1031 #else 1032 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1033 1034 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1035 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 1036 1037 if (dim[1]%2==0) NM = dim[1]+2; 1038 else NM = dim[1]+1; 1039 1040 for (i=0; i<local_n0; i++) { 1041 for (j=0; j<dim[1]; j++) { 1042 tempindx = i*dim[1] + j; 1043 tempindx1 = i*NM + j; 1044 1045 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1046 indx2[tempindx]=low+tempindx1; 1047 } 1048 } 1049 1050 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1051 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1052 1053 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1054 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1056 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1057 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1058 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1059 ierr = PetscFree(indx1);CHKERRQ(ierr); 1060 ierr = PetscFree(indx2);CHKERRQ(ierr); 1061 #endif 1062 break; 1063 case 3: 1064 #if defined(PETSC_USE_COMPLEX) 1065 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1066 1067 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1);CHKERRQ(ierr); 1068 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2);CHKERRQ(ierr); 1069 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1070 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1071 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1072 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1073 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1074 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1075 #else 1076 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1077 1078 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1079 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1080 1081 if (dim[2]%2==0) NM = dim[2]+2; 1082 else NM = dim[2]+1; 1083 1084 for (i=0; i<local_n0; i++) { 1085 for (j=0; j<dim[1]; j++) { 1086 for (k=0; k<dim[2]; k++) { 1087 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1088 tempindx1 = i*dim[1]*NM + j*NM + k; 1089 1090 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1091 indx2[tempindx]=low+tempindx1; 1092 } 1093 } 1094 } 1095 1096 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1097 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1098 1099 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1100 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1101 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1102 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1103 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1104 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1105 ierr = PetscFree(indx1);CHKERRQ(ierr); 1106 ierr = PetscFree(indx2);CHKERRQ(ierr); 1107 #endif 1108 break; 1109 default: 1110 #if defined(PETSC_USE_COMPLEX) 1111 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1112 1113 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1);CHKERRQ(ierr); 1114 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2);CHKERRQ(ierr); 1115 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1116 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1118 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1119 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1120 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1121 #else 1122 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1123 1124 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1125 1126 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1127 1128 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1129 1130 partial_dim = fftw->partial_dim; 1131 1132 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1133 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1134 1135 if (dim[ndim-1]%2==0) NM = 2; 1136 else NM = 1; 1137 1138 j = low; 1139 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1140 indx1[i] = local_0_start*partial_dim + i; 1141 indx2[i] = j; 1142 if (k%dim[ndim-1]==0) j+=NM; 1143 j++; 1144 } 1145 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1146 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1147 1148 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1149 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1151 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1152 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1153 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1154 ierr = PetscFree(indx1);CHKERRQ(ierr); 1155 ierr = PetscFree(indx2);CHKERRQ(ierr); 1156 #endif 1157 break; 1158 } 1159 } 1160 PetscFunctionReturn(0); 1161 } 1162 1163 /* 1164 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1165 1166 Options Database Keys: 1167 + -mat_fftw_plannerflags - set FFTW planner flags 1168 1169 Level: intermediate 1170 1171 */ 1172 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1173 { 1174 PetscErrorCode ierr; 1175 MPI_Comm comm; 1176 Mat_FFT *fft=(Mat_FFT*)A->data; 1177 Mat_FFTW *fftw; 1178 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1179 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1180 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1181 PetscBool flg; 1182 PetscInt p_flag,partial_dim=1,ctr; 1183 PetscMPIInt size,rank; 1184 ptrdiff_t *pdim; 1185 ptrdiff_t local_n1,local_1_start; 1186 #if !defined(PETSC_USE_COMPLEX) 1187 ptrdiff_t temp; 1188 PetscInt N1,tot_dim; 1189 #else 1190 PetscInt n1; 1191 #endif 1192 1193 PetscFunctionBegin; 1194 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(comm, &size);CHKERRMPI(ierr); 1196 ierr = MPI_Comm_rank(comm, &rank);CHKERRMPI(ierr); 1197 1198 fftw_mpi_init(); 1199 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1200 pdim[0] = dim[0]; 1201 #if !defined(PETSC_USE_COMPLEX) 1202 tot_dim = 2*dim[0]; 1203 #endif 1204 for (ctr=1; ctr<ndim; ctr++) { 1205 partial_dim *= dim[ctr]; 1206 pdim[ctr] = dim[ctr]; 1207 #if !defined(PETSC_USE_COMPLEX) 1208 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1209 else tot_dim *= dim[ctr]; 1210 #endif 1211 } 1212 1213 if (size == 1) { 1214 #if defined(PETSC_USE_COMPLEX) 1215 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1216 n = N; 1217 #else 1218 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1219 n = tot_dim; 1220 #endif 1221 1222 } else { 1223 ptrdiff_t local_n0,local_0_start; 1224 switch (ndim) { 1225 case 1: 1226 #if !defined(PETSC_USE_COMPLEX) 1227 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1228 #else 1229 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1230 1231 n = (PetscInt)local_n0; 1232 n1 = (PetscInt)local_n1; 1233 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1234 #endif 1235 break; 1236 case 2: 1237 #if defined(PETSC_USE_COMPLEX) 1238 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1239 /* 1240 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1241 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1242 */ 1243 n = (PetscInt)local_n0*dim[1]; 1244 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1245 #else 1246 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1247 1248 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1249 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1));CHKERRQ(ierr); 1250 #endif 1251 break; 1252 case 3: 1253 #if defined(PETSC_USE_COMPLEX) 1254 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1255 1256 n = (PetscInt)local_n0*dim[1]*dim[2]; 1257 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1258 #else 1259 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1260 1261 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1262 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1));CHKERRQ(ierr); 1263 #endif 1264 break; 1265 default: 1266 #if defined(PETSC_USE_COMPLEX) 1267 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1268 1269 n = (PetscInt)local_n0*partial_dim; 1270 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1271 #else 1272 temp = pdim[ndim-1]; 1273 1274 pdim[ndim-1] = temp/2 + 1; 1275 1276 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1277 1278 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1279 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1280 1281 pdim[ndim-1] = temp; 1282 1283 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1284 #endif 1285 break; 1286 } 1287 } 1288 free(pdim); 1289 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1290 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1291 fft->data = (void*)fftw; 1292 1293 fft->n = n; 1294 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1295 fftw->partial_dim = partial_dim; 1296 1297 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1298 if (size == 1) { 1299 #if defined(PETSC_USE_64BIT_INDICES) 1300 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1301 #else 1302 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1303 #endif 1304 } 1305 1306 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1307 1308 fftw->p_forward = NULL; 1309 fftw->p_backward = NULL; 1310 fftw->p_flag = FFTW_ESTIMATE; 1311 1312 if (size == 1) { 1313 A->ops->mult = MatMult_SeqFFTW; 1314 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1315 } else { 1316 A->ops->mult = MatMult_MPIFFTW; 1317 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1318 } 1319 fft->matdestroy = MatDestroy_FFTW; 1320 A->assembled = PETSC_TRUE; 1321 A->preallocated = PETSC_TRUE; 1322 1323 ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1324 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1325 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1326 1327 /* get runtime options */ 1328 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1329 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1330 if (flg) { 1331 fftw->p_flag = iplans[p_flag]; 1332 } 1333 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337