1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 30 Input parameter: 31 mat - the matrix 32 x - the vector on which FDFT will be performed 33 34 Output parameter: 35 y - vector that stores result of FDFT 36 37 */ 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMult_SeqFFTW" 40 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 41 { 42 PetscErrorCode ierr; 43 Mat_FFT *fft = (Mat_FFT*)A->data; 44 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 45 PetscScalar *x_array,*y_array; 46 PetscInt ndim=fft->ndim,*dim=fft->dim; 47 48 PetscFunctionBegin; 49 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 50 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 51 if (!fftw->p_forward){ /* create a plan, then excute it */ 52 switch (ndim){ 53 case 1: 54 #if defined(PETSC_USE_COMPLEX) 55 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 56 #else 57 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 58 #endif 59 break; 60 case 2: 61 #if defined(PETSC_USE_COMPLEX) 62 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 63 #else 64 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 65 #endif 66 break; 67 case 3: 68 #if defined(PETSC_USE_COMPLEX) 69 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 70 #else 71 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 72 #endif 73 break; 74 default: 75 #if defined(PETSC_USE_COMPLEX) 76 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 77 #else 78 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 79 #endif 80 break; 81 } 82 fftw->finarray = x_array; 83 fftw->foutarray = y_array; 84 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 85 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 86 fftw_execute(fftw->p_forward); 87 } else { /* use existing plan */ 88 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 89 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 90 } else { 91 fftw_execute(fftw->p_forward); 92 } 93 } 94 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 95 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 99 /* MatMultTranspose_SeqFFTW performs serial backward DFT 100 101 Input parameter: 102 mat - the matrix 103 x - the vector on which BDFT will be performed 104 105 Output parameter: 106 y - vector that stores result of BDFT 107 108 */ 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 112 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 113 { 114 PetscErrorCode ierr; 115 Mat_FFT *fft = (Mat_FFT*)A->data; 116 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 117 PetscScalar *x_array,*y_array; 118 PetscInt ndim=fft->ndim,*dim=fft->dim; 119 120 PetscFunctionBegin; 121 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 122 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 123 if (!fftw->p_backward){ /* create a plan, then excute it */ 124 switch (ndim){ 125 case 1: 126 #if defined(PETSC_USE_COMPLEX) 127 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 128 #else 129 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 130 #endif 131 break; 132 case 2: 133 #if defined(PETSC_USE_COMPLEX) 134 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 135 #else 136 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 137 #endif 138 break; 139 case 3: 140 #if defined(PETSC_USE_COMPLEX) 141 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 142 #else 143 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 144 #endif 145 break; 146 default: 147 #if defined(PETSC_USE_COMPLEX) 148 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 149 #else 150 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 151 #endif 152 break; 153 } 154 fftw->binarray = x_array; 155 fftw->boutarray = y_array; 156 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 157 } else { /* use existing plan */ 158 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 159 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 160 } else { 161 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 162 } 163 } 164 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 165 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 /* MatMult_MPIFFTW performs forward DFT in parallel 170 171 Input parameter: 172 mat - the matrix 173 x - the vector on which FDFT will be performed 174 175 Output parameter: 176 y - vector that stores result of FDFT 177 178 */ 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMult_MPIFFTW" 182 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 183 { 184 PetscErrorCode ierr; 185 Mat_FFT *fft = (Mat_FFT*)A->data; 186 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 187 PetscScalar *x_array,*y_array; 188 PetscInt ndim=fft->ndim,*dim=fft->dim; 189 MPI_Comm comm=((PetscObject)A)->comm; 190 191 PetscFunctionBegin; 192 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 193 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 194 if (!fftw->p_forward){ /* create a plan, then excute it */ 195 switch (ndim){ 196 case 1: 197 #if defined(PETSC_USE_COMPLEX) 198 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 199 #else 200 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 201 #endif 202 break; 203 case 2: 204 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 205 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 206 #else 207 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 208 #endif 209 break; 210 case 3: 211 #if defined(PETSC_USE_COMPLEX) 212 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 213 #else 214 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 215 #endif 216 break; 217 default: 218 #if defined(PETSC_USE_COMPLEX) 219 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 220 #else 221 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 222 #endif 223 break; 224 } 225 fftw->finarray = x_array; 226 fftw->foutarray = y_array; 227 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 228 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 229 fftw_execute(fftw->p_forward); 230 } else { /* use existing plan */ 231 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 232 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 233 } else { 234 fftw_execute(fftw->p_forward); 235 } 236 } 237 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 238 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 243 244 Input parameter: 245 mat - the matrix 246 x - the vector on which BDFT will be performed 247 248 Output parameter: 249 y - vector that stores result of BDFT 250 251 */ 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 254 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 255 { 256 PetscErrorCode ierr; 257 Mat_FFT *fft = (Mat_FFT*)A->data; 258 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 259 PetscScalar *x_array,*y_array; 260 PetscInt ndim=fft->ndim,*dim=fft->dim; 261 MPI_Comm comm=((PetscObject)A)->comm; 262 263 PetscFunctionBegin; 264 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 265 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 266 if (!fftw->p_backward){ /* create a plan, then excute it */ 267 switch (ndim){ 268 case 1: 269 #if defined(PETSC_USE_COMPLEX) 270 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 271 #else 272 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 273 #endif 274 break; 275 case 2: 276 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 277 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 278 #else 279 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 280 #endif 281 break; 282 case 3: 283 #if defined(PETSC_USE_COMPLEX) 284 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 285 #else 286 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 287 #endif 288 break; 289 default: 290 #if defined(PETSC_USE_COMPLEX) 291 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 292 #else 293 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 294 #endif 295 break; 296 } 297 fftw->binarray = x_array; 298 fftw->boutarray = y_array; 299 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 300 } else { /* use existing plan */ 301 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 302 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 303 } else { 304 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 305 } 306 } 307 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 308 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatDestroy_FFTW" 314 PetscErrorCode MatDestroy_FFTW(Mat A) 315 { 316 Mat_FFT *fft = (Mat_FFT*)A->data; 317 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 fftw_destroy_plan(fftw->p_forward); 322 fftw_destroy_plan(fftw->p_backward); 323 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 324 ierr = PetscFree(fft->data);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 329 #undef __FUNCT__ 330 #define __FUNCT__ "VecDestroy_MPIFFTW" 331 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 332 { 333 PetscErrorCode ierr; 334 PetscScalar *array; 335 336 PetscFunctionBegin; 337 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 338 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 339 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 340 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatGetVecsFFTW" 347 /*@ 348 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 349 parallel layout determined by FFTW 350 351 Collective on Mat 352 353 Input Parameter: 354 . mat - the matrix 355 356 Output Parameter: 357 + fin - (optional) input vector of forward FFTW 358 - fout - (optional) output vector of forward FFTW 359 - bout - (optional) output vector of backward FFTW 360 361 Level: advanced 362 Note: The parallel layout of output of forward FFTW is always same as the input 363 of the backward FFTW. But parallel layout of the input vector of forward 364 FFTW might not be same as the output of backward FFTW. 365 Also note that we need to provide enough space while doing parallel real transform. 366 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 367 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 368 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 369 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 370 zeros if it is odd only one zero is needed. 371 Lastly one needs some scratch space at the end of data set in each process. alloc_local 372 figures out how much space is needed, i.e. it figures out the data+scratch space for 373 each processor and returns that. 374 375 .seealso: MatCreateFFTW() 376 @*/ 377 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 378 { 379 PetscErrorCode ierr; 380 381 PetscFunctionBegin; 382 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 383 PetscFunctionReturn(0); 384 } 385 386 EXTERN_C_BEGIN 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 389 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 390 { 391 PetscErrorCode ierr; 392 PetscMPIInt size,rank; 393 MPI_Comm comm=((PetscObject)A)->comm; 394 Mat_FFT *fft = (Mat_FFT*)A->data; 395 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 396 PetscInt N=fft->N; 397 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 398 399 PetscFunctionBegin; 400 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 401 PetscValidType(A,1); 402 403 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 404 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 405 if (size == 1){ /* sequential case */ 406 #if defined(PETSC_USE_COMPLEX) 407 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 408 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 409 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 410 #else 411 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 412 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 413 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 414 #endif 415 } else { /* parallel cases */ 416 ptrdiff_t alloc_local,local_n0,local_0_start; 417 ptrdiff_t local_n1; 418 fftw_complex *data_fout; 419 ptrdiff_t local_1_start; 420 #if defined(PETSC_USE_COMPLEX) 421 fftw_complex *data_fin,*data_bout; 422 #else 423 double *data_finr,*data_boutr; 424 PetscInt n1,N1,vsize; 425 ptrdiff_t temp; 426 #endif 427 428 switch (ndim){ 429 case 1: 430 #if !defined(PETSC_USE_COMPLEX) 431 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 432 #else 433 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 434 if (fin) { 435 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 436 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 437 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 438 } 439 if (fout) { 440 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 441 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 442 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 443 } 444 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 445 if (bout) { 446 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 447 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 448 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 449 } 450 break; 451 #endif 452 case 2: 453 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 454 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 455 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 456 if (fin) { 457 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 458 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 459 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 460 } 461 if (bout) { 462 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 463 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 464 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 465 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 466 } 467 //n1 = 2*local_n1*dim[0]; 468 if (fout) { 469 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 470 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 471 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 472 } 473 #else 474 /* Get local size */ 475 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 476 if (fin) { 477 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 478 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 479 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 if (fout) { 482 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 484 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 485 } 486 if (bout) { 487 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 488 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 489 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 490 } 491 #endif 492 break; 493 case 3: 494 #if !defined(PETSC_USE_COMPLEX) 495 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 496 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 497 if (fin) { 498 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 499 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 500 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 501 } 502 if (bout) { 503 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 504 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 505 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 506 } 507 //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 508 if (fout) { 509 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 510 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 511 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 512 } 513 #else 514 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 515 if (fin) { 516 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 517 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 518 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 if (fout) { 521 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 522 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 523 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 524 } 525 if (bout) { 526 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 527 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 528 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 529 } 530 #endif 531 break; 532 default: 533 #if !defined(PETSC_USE_COMPLEX) 534 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 535 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 536 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 537 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 538 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 539 540 if (fin) { 541 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 542 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 543 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 544 } 545 if (bout) { 546 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 547 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 548 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 549 } 550 //temp = fftw->partial_dim; 551 //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 552 //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 553 if (fout) { 554 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 555 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 556 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 557 } 558 #else 559 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 560 if (fin) { 561 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 562 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 563 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 564 } 565 if (fout) { 566 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 567 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 568 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 569 } 570 if (bout) { 571 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 572 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 573 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 574 } 575 #endif 576 break; 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 EXTERN_C_END 582 583 #undef __FUNCT__ 584 #define __FUNCT__ "VecScatterPetscToFFTW" 585 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 586 { 587 PetscErrorCode ierr; 588 PetscFunctionBegin; 589 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 /* 594 VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block. 595 Input A, x, y 596 A - FFTW matrix 597 x - user data 598 Options Database Keys: 599 + -mat_fftw_plannerflags - set FFTW planner flags 600 601 Level: intermediate 602 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 603 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 604 zeros. This routine does that job by scattering operation. 605 606 607 */ 608 609 EXTERN_C_BEGIN 610 #undef __FUNCT__ 611 #define __FUNCT__ "VecScatterPetscToFFTW_FTTW" 612 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 613 { 614 PetscErrorCode ierr; 615 MPI_Comm comm=((PetscObject)A)->comm; 616 Mat_FFT *fft = (Mat_FFT*)A->data; 617 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 618 PetscInt N=fft->N; 619 PetscInt ndim=fft->ndim,*dim=fft->dim; 620 PetscInt low; 621 PetscInt rank,size,vsize,vsize1; 622 ptrdiff_t alloc_local,local_n0,local_0_start; 623 ptrdiff_t local_n1,local_1_start; 624 VecScatter vecscat; 625 IS list1,list2; 626 #if !defined(PETSC_USE_COMPLEX) 627 PetscInt i,j,k,partial_dim; 628 PetscInt *indx1, *indx2, tempindx, tempindx1; 629 PetscInt N1, n1 ,NM; 630 ptrdiff_t temp; 631 #endif 632 633 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 634 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 635 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 636 637 if (size==1) 638 { 639 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 640 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 641 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 642 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 643 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 644 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 645 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 646 ierr = ISDestroy(&list1);CHKERRQ(ierr); 647 } 648 649 else{ 650 651 switch (ndim){ 652 case 1: 653 #if defined(PETSC_USE_COMPLEX) 654 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 655 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 656 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 657 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 658 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 659 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 660 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 661 ierr = ISDestroy(&list1);CHKERRQ(ierr); 662 ierr = ISDestroy(&list2);CHKERRQ(ierr); 663 #else 664 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 665 #endif 666 break; 667 case 2: 668 #if defined(PETSC_USE_COMPLEX) 669 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 670 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 671 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 672 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 673 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 674 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 675 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 676 ierr = ISDestroy(&list1);CHKERRQ(ierr); 677 ierr = ISDestroy(&list2);CHKERRQ(ierr); 678 #else 679 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 680 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 681 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 682 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 683 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 684 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 685 686 if (dim[1]%2==0) 687 NM = dim[1]+2; 688 else 689 NM = dim[1]+1; 690 691 for (i=0;i<local_n0;i++){ 692 for (j=0;j<dim[1];j++){ 693 tempindx = i*dim[1] + j; 694 tempindx1 = i*NM + j; 695 indx1[tempindx]=local_0_start*dim[1]+tempindx; 696 indx2[tempindx]=low+tempindx1; 697 } 698 } 699 700 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 701 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 702 703 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 704 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 705 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 706 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 707 ierr = ISDestroy(&list1);CHKERRQ(ierr); 708 ierr = ISDestroy(&list2);CHKERRQ(ierr); 709 ierr = PetscFree(indx1);CHKERRQ(ierr); 710 ierr = PetscFree(indx2);CHKERRQ(ierr); 711 #endif 712 break; 713 714 case 3: 715 #if defined(PETSC_USE_COMPLEX) 716 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 717 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 718 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 719 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 720 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 721 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 723 ierr = ISDestroy(&list1);CHKERRQ(ierr); 724 ierr = ISDestroy(&list2);CHKERRQ(ierr); 725 #else 726 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 727 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 728 729 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 730 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 731 732 if (dim[2]%2==0) 733 NM = dim[2]+2; 734 else 735 NM = dim[2]+1; 736 737 for (i=0;i<local_n0;i++){ 738 for (j=0;j<dim[1];j++){ 739 for (k=0;k<dim[2];k++){ 740 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 741 tempindx1 = i*dim[1]*NM + j*NM + k; 742 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 743 indx2[tempindx]=low+tempindx1; 744 } 745 } 746 } 747 748 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 749 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 750 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 751 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 752 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 753 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 754 ierr = ISDestroy(&list1);CHKERRQ(ierr); 755 ierr = ISDestroy(&list2);CHKERRQ(ierr); 756 ierr = PetscFree(indx1);CHKERRQ(ierr); 757 ierr = PetscFree(indx2);CHKERRQ(ierr); 758 #endif 759 break; 760 761 default: 762 #if defined(PETSC_USE_COMPLEX) 763 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 764 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 765 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 766 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 767 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 768 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 769 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 770 ierr = ISDestroy(&list1);CHKERRQ(ierr); 771 ierr = ISDestroy(&list2);CHKERRQ(ierr); 772 #else 773 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 774 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 775 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 776 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 777 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 778 779 partial_dim = fftw->partial_dim; 780 781 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 782 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 783 784 if (dim[ndim-1]%2==0) 785 NM = 2; 786 else 787 NM = 1; 788 789 j = low; 790 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 791 { 792 indx1[i] = local_0_start*partial_dim + i; 793 indx2[i] = j; 794 if (k%dim[ndim-1]==0) 795 { j+=NM;} 796 j++; 797 } 798 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 799 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 800 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 801 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 802 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 803 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 804 ierr = ISDestroy(&list1);CHKERRQ(ierr); 805 ierr = ISDestroy(&list2);CHKERRQ(ierr); 806 ierr = PetscFree(indx1);CHKERRQ(ierr); 807 ierr = PetscFree(indx2);CHKERRQ(ierr); 808 #endif 809 break; 810 } 811 } 812 return(0); 813 } 814 EXTERN_C_END 815 816 #undef __FUNCT__ 817 #define __FUNCT__ "VecScatterFFTWToPetsc" 818 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 819 { 820 PetscErrorCode ierr; 821 PetscFunctionBegin; 822 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } 825 826 /* 827 VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use. 828 Input A, x, y 829 A - FFTW matrix 830 x - FFTW vector 831 y - PETSc vector that user can read 832 833 Level: intermediate 834 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 835 VecScatterFFTWToPetsc removes those extra zeros. 836 837 */ 838 839 EXTERN_C_BEGIN 840 #undef __FUNCT__ 841 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 842 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 843 { 844 PetscErrorCode ierr; 845 MPI_Comm comm=((PetscObject)A)->comm; 846 Mat_FFT *fft = (Mat_FFT*)A->data; 847 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 848 PetscInt N=fft->N; 849 PetscInt ndim=fft->ndim,*dim=fft->dim; 850 PetscInt low; 851 PetscInt rank,size; 852 ptrdiff_t alloc_local,local_n0,local_0_start; 853 ptrdiff_t local_n1,local_1_start; 854 VecScatter vecscat; 855 IS list1,list2; 856 #if !defined(PETSC_USE_COMPLEX) 857 PetscInt i,j,k,partial_dim; 858 PetscInt *indx1, *indx2, tempindx, tempindx1; 859 PetscInt N1, n1 ,NM; 860 ptrdiff_t temp; 861 #endif 862 863 864 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 865 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 866 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 867 868 if (size==1){ 869 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 870 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 871 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 874 ierr = ISDestroy(&list1);CHKERRQ(ierr); 875 } 876 else{ 877 878 switch (ndim){ 879 case 1: 880 #if defined(PETSC_USE_COMPLEX) 881 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 882 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 883 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 884 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 885 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 888 ierr = ISDestroy(&list1);CHKERRQ(ierr); 889 ierr = ISDestroy(&list2);CHKERRQ(ierr); 890 #else 891 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 892 #endif 893 break; 894 case 2: 895 #if defined(PETSC_USE_COMPLEX) 896 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 897 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 898 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 899 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 900 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 902 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 903 ierr = ISDestroy(&list1);CHKERRQ(ierr); 904 ierr = ISDestroy(&list2);CHKERRQ(ierr); 905 #else 906 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 907 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 908 909 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 910 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 911 912 if (dim[1]%2==0) 913 NM = dim[1]+2; 914 else 915 NM = dim[1]+1; 916 917 for (i=0;i<local_n0;i++){ 918 for (j=0;j<dim[1];j++){ 919 tempindx = i*dim[1] + j; 920 tempindx1 = i*NM + j; 921 indx1[tempindx]=local_0_start*dim[1]+tempindx; 922 indx2[tempindx]=low+tempindx1; 923 } 924 } 925 926 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 927 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 928 929 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 930 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 932 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 933 ierr = ISDestroy(&list1);CHKERRQ(ierr); 934 ierr = ISDestroy(&list2);CHKERRQ(ierr); 935 ierr = PetscFree(indx1);CHKERRQ(ierr); 936 ierr = PetscFree(indx2);CHKERRQ(ierr); 937 #endif 938 break; 939 case 3: 940 #if defined(PETSC_USE_COMPLEX) 941 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 942 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 943 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 944 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 945 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 948 ierr = ISDestroy(&list1);CHKERRQ(ierr); 949 ierr = ISDestroy(&list2);CHKERRQ(ierr); 950 #else 951 952 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 953 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 954 955 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 956 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 957 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 958 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 959 960 if (dim[2]%2==0) 961 NM = dim[2]+2; 962 else 963 NM = dim[2]+1; 964 965 for (i=0;i<local_n0;i++){ 966 for (j=0;j<dim[1];j++){ 967 for (k=0;k<dim[2];k++){ 968 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 969 tempindx1 = i*dim[1]*NM + j*NM + k; 970 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 971 indx2[tempindx]=low+tempindx1; 972 } 973 } 974 } 975 976 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 977 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 978 979 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 980 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 983 ierr = ISDestroy(&list1);CHKERRQ(ierr); 984 ierr = ISDestroy(&list2);CHKERRQ(ierr); 985 ierr = PetscFree(indx1);CHKERRQ(ierr); 986 ierr = PetscFree(indx2);CHKERRQ(ierr); 987 #endif 988 break; 989 default: 990 #if defined(PETSC_USE_COMPLEX) 991 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 992 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 993 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 994 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 995 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 996 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 997 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 998 ierr = ISDestroy(&list1);CHKERRQ(ierr); 999 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1000 #else 1001 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1002 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1003 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1004 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1005 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1006 1007 partial_dim = fftw->partial_dim; 1008 1009 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1010 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1011 1012 if (dim[ndim-1]%2==0) 1013 NM = 2; 1014 else 1015 NM = 1; 1016 1017 j = low; 1018 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1019 { 1020 indx1[i] = local_0_start*partial_dim + i; 1021 indx2[i] = j; 1022 if (k%dim[ndim-1]==0) 1023 { j+=NM;} 1024 j++; 1025 } 1026 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1027 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1028 1029 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1030 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1031 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1032 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1033 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1034 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1035 ierr = PetscFree(indx1);CHKERRQ(ierr); 1036 ierr = PetscFree(indx2);CHKERRQ(ierr); 1037 #endif 1038 break; 1039 } 1040 } 1041 return 0; 1042 } 1043 EXTERN_C_END 1044 1045 EXTERN_C_BEGIN 1046 #undef __FUNCT__ 1047 #define __FUNCT__ "MatCreate_FFTW" 1048 /* 1049 MatCreate_FFTW - Creates a matrix object that provides FFT 1050 via the external package FFTW 1051 Options Database Keys: 1052 + -mat_fftw_plannerflags - set FFTW planner flags 1053 1054 Level: intermediate 1055 1056 */ 1057 1058 PetscErrorCode MatCreate_FFTW(Mat A) 1059 { 1060 PetscErrorCode ierr; 1061 MPI_Comm comm=((PetscObject)A)->comm; 1062 Mat_FFT *fft=(Mat_FFT*)A->data; 1063 Mat_FFTW *fftw; 1064 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1065 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1066 PetscBool flg; 1067 PetscInt p_flag,partial_dim=1,ctr; 1068 PetscMPIInt size,rank; 1069 ptrdiff_t *pdim; 1070 ptrdiff_t local_n1,local_1_start; 1071 #if !defined(PETSC_USE_COMPLEX) 1072 ptrdiff_t temp; 1073 PetscInt N1,tot_dim; 1074 #else 1075 PetscInt n1; 1076 #endif 1077 1078 PetscFunctionBegin; 1079 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1080 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1081 1082 fftw_mpi_init(); 1083 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1084 pdim[0] = dim[0]; 1085 #if !defined(PETSC_USE_COMPLEX) 1086 tot_dim = 2*dim[0]; 1087 #endif 1088 for (ctr=1;ctr<ndim;ctr++) 1089 { 1090 partial_dim *= dim[ctr]; 1091 pdim[ctr] = dim[ctr]; 1092 #if !defined(PETSC_USE_COMPLEX) 1093 if (ctr==ndim-1) 1094 tot_dim *= (dim[ctr]/2+1); 1095 else 1096 tot_dim *= dim[ctr]; 1097 #endif 1098 } 1099 1100 if (size == 1) { 1101 #if defined(PETSC_USE_COMPLEX) 1102 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1103 n = N; 1104 #else 1105 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1106 n = tot_dim; 1107 #endif 1108 1109 } else { 1110 ptrdiff_t alloc_local,local_n0,local_0_start; 1111 switch (ndim){ 1112 case 1: 1113 #if !defined(PETSC_USE_COMPLEX) 1114 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1115 #else 1116 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1117 n = (PetscInt)local_n0; 1118 n1 = (PetscInt) local_n1; 1119 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1120 #endif 1121 break; 1122 case 2: 1123 #if defined(PETSC_USE_COMPLEX) 1124 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1125 /* 1126 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1127 PetscSynchronizedFlush(comm); 1128 */ 1129 n = (PetscInt)local_n0*dim[1]; 1130 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1131 #else 1132 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1133 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1134 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1135 #endif 1136 break; 1137 case 3: 1138 #if defined(PETSC_USE_COMPLEX) 1139 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1140 n = (PetscInt)local_n0*dim[1]*dim[2]; 1141 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1142 #else 1143 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1144 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1145 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1146 #endif 1147 break; 1148 default: 1149 #if defined(PETSC_USE_COMPLEX) 1150 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1151 n = (PetscInt)local_n0*partial_dim; 1152 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1153 #else 1154 temp = pdim[ndim-1]; 1155 pdim[ndim-1] = temp/2 + 1; 1156 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1157 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1158 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1159 pdim[ndim-1] = temp; 1160 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1161 #endif 1162 break; 1163 } 1164 } 1165 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1166 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1167 fft->data = (void*)fftw; 1168 1169 fft->n = n; 1170 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1171 fftw->partial_dim = partial_dim; 1172 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1173 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1174 1175 fftw->p_forward = 0; 1176 fftw->p_backward = 0; 1177 fftw->p_flag = FFTW_ESTIMATE; 1178 1179 if (size == 1){ 1180 A->ops->mult = MatMult_SeqFFTW; 1181 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1182 } else { 1183 A->ops->mult = MatMult_MPIFFTW; 1184 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1185 } 1186 fft->matdestroy = MatDestroy_FFTW; 1187 A->assembled = PETSC_TRUE; 1188 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 1189 PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW); 1190 PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW); 1191 1192 /* get runtime options */ 1193 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1194 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1195 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1196 PetscOptionsEnd(); 1197 PetscFunctionReturn(0); 1198 } 1199 EXTERN_C_END 1200 1201 1202 1203 1204