1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 30 Input parameter: 31 mat - the matrix 32 x - the vector on which FDFT will be performed 33 34 Output parameter: 35 y - vector that stores result of FDFT 36 37 */ 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMult_SeqFFTW" 40 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 41 { 42 PetscErrorCode ierr; 43 Mat_FFT *fft = (Mat_FFT*)A->data; 44 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 45 PetscScalar *x_array,*y_array; 46 PetscInt ndim=fft->ndim,*dim=fft->dim; 47 48 PetscFunctionBegin; 49 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 50 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 51 if (!fftw->p_forward){ /* create a plan, then excute it */ 52 switch (ndim){ 53 case 1: 54 #if defined(PETSC_USE_COMPLEX) 55 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 56 #else 57 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 58 #endif 59 break; 60 case 2: 61 #if defined(PETSC_USE_COMPLEX) 62 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 63 #else 64 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 65 #endif 66 break; 67 case 3: 68 #if defined(PETSC_USE_COMPLEX) 69 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 70 #else 71 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 72 #endif 73 break; 74 default: 75 #if defined(PETSC_USE_COMPLEX) 76 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 77 #else 78 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 79 #endif 80 break; 81 } 82 fftw->finarray = x_array; 83 fftw->foutarray = y_array; 84 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 85 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 86 fftw_execute(fftw->p_forward); 87 } else { /* use existing plan */ 88 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 89 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 90 } else { 91 fftw_execute(fftw->p_forward); 92 } 93 } 94 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 95 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 99 /* MatMultTranspose_SeqFFTW performs serial backward DFT 100 101 Input parameter: 102 mat - the matrix 103 x - the vector on which BDFT will be performed 104 105 Output parameter: 106 y - vector that stores result of BDFT 107 108 */ 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 112 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 113 { 114 PetscErrorCode ierr; 115 Mat_FFT *fft = (Mat_FFT*)A->data; 116 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 117 PetscScalar *x_array,*y_array; 118 PetscInt ndim=fft->ndim,*dim=fft->dim; 119 120 PetscFunctionBegin; 121 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 122 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 123 if (!fftw->p_backward){ /* create a plan, then excute it */ 124 switch (ndim){ 125 case 1: 126 #if defined(PETSC_USE_COMPLEX) 127 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 128 #else 129 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 130 #endif 131 break; 132 case 2: 133 #if defined(PETSC_USE_COMPLEX) 134 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 135 #else 136 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 137 #endif 138 break; 139 case 3: 140 #if defined(PETSC_USE_COMPLEX) 141 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 142 #else 143 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 144 #endif 145 break; 146 default: 147 #if defined(PETSC_USE_COMPLEX) 148 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 149 #else 150 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 151 #endif 152 break; 153 } 154 fftw->binarray = x_array; 155 fftw->boutarray = y_array; 156 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 157 } else { /* use existing plan */ 158 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 159 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 160 } else { 161 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 162 } 163 } 164 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 165 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 /* MatMult_MPIFFTW performs forward DFT in parallel 170 171 Input parameter: 172 mat - the matrix 173 x - the vector on which FDFT will be performed 174 175 Output parameter: 176 y - vector that stores result of FDFT 177 178 */ 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMult_MPIFFTW" 182 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 183 { 184 PetscErrorCode ierr; 185 Mat_FFT *fft = (Mat_FFT*)A->data; 186 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 187 PetscScalar *x_array,*y_array; 188 PetscInt ndim=fft->ndim,*dim=fft->dim; 189 MPI_Comm comm=((PetscObject)A)->comm; 190 191 PetscFunctionBegin; 192 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 193 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 194 if (!fftw->p_forward){ /* create a plan, then excute it */ 195 switch (ndim){ 196 case 1: 197 #if defined(PETSC_USE_COMPLEX) 198 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 199 #else 200 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 201 #endif 202 break; 203 case 2: 204 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 205 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 206 #else 207 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 208 #endif 209 break; 210 case 3: 211 #if defined(PETSC_USE_COMPLEX) 212 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 213 #else 214 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 215 #endif 216 break; 217 default: 218 #if defined(PETSC_USE_COMPLEX) 219 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 220 #else 221 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 222 #endif 223 break; 224 } 225 fftw->finarray = x_array; 226 fftw->foutarray = y_array; 227 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 228 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 229 fftw_execute(fftw->p_forward); 230 } else { /* use existing plan */ 231 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 232 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 233 } else { 234 fftw_execute(fftw->p_forward); 235 } 236 } 237 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 238 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 243 244 Input parameter: 245 mat - the matrix 246 x - the vector on which BDFT will be performed 247 248 Output parameter: 249 y - vector that stores result of BDFT 250 251 */ 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 254 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 255 { 256 PetscErrorCode ierr; 257 Mat_FFT *fft = (Mat_FFT*)A->data; 258 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 259 PetscScalar *x_array,*y_array; 260 PetscInt ndim=fft->ndim,*dim=fft->dim; 261 MPI_Comm comm=((PetscObject)A)->comm; 262 263 PetscFunctionBegin; 264 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 265 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 266 if (!fftw->p_backward){ /* create a plan, then excute it */ 267 switch (ndim){ 268 case 1: 269 #if defined(PETSC_USE_COMPLEX) 270 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 271 #else 272 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 273 #endif 274 break; 275 case 2: 276 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 277 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 278 #else 279 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 280 #endif 281 break; 282 case 3: 283 #if defined(PETSC_USE_COMPLEX) 284 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 285 #else 286 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 287 #endif 288 break; 289 default: 290 #if defined(PETSC_USE_COMPLEX) 291 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 292 #else 293 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 294 #endif 295 break; 296 } 297 fftw->binarray = x_array; 298 fftw->boutarray = y_array; 299 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 300 } else { /* use existing plan */ 301 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 302 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 303 } else { 304 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 305 } 306 } 307 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 308 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatDestroy_FFTW" 314 PetscErrorCode MatDestroy_FFTW(Mat A) 315 { 316 Mat_FFT *fft = (Mat_FFT*)A->data; 317 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 fftw_destroy_plan(fftw->p_forward); 322 fftw_destroy_plan(fftw->p_backward); 323 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 324 ierr = PetscFree(fft->data);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 329 #undef __FUNCT__ 330 #define __FUNCT__ "VecDestroy_MPIFFTW" 331 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 332 { 333 PetscErrorCode ierr; 334 PetscScalar *array; 335 336 PetscFunctionBegin; 337 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 338 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 339 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 340 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatGetVecsFFTW" 347 /*@ 348 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 349 parallel layout determined by FFTW 350 351 Collective on Mat 352 353 Input Parameter: 354 . mat - the matrix 355 356 Output Parameter: 357 + fin - (optional) input vector of forward FFTW 358 - fout - (optional) output vector of forward FFTW 359 - bout - (optional) output vector of backward FFTW 360 361 Level: advanced 362 Note: The parallel layout of output of forward FFTW is always same as the input 363 of the backward FFTW. But parallel layout of the input vector of forward 364 FFTW might not be same as the output of backward FFTW. 365 Also note that we need to provide enough space while doing parallel real transform. 366 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 367 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 368 last dimension from n to n/2+1 while invoking this routine. 369 370 .seealso: MatCreateFFTW() 371 @*/ 372 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 373 { 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 378 PetscFunctionReturn(0); 379 } 380 381 EXTERN_C_BEGIN 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 384 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 385 { 386 PetscErrorCode ierr; 387 PetscMPIInt size,rank; 388 MPI_Comm comm=((PetscObject)A)->comm; 389 Mat_FFT *fft = (Mat_FFT*)A->data; 390 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 391 PetscInt N=fft->N; 392 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 393 394 PetscFunctionBegin; 395 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 396 PetscValidType(A,1); 397 398 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 399 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 400 if (size == 1){ /* sequential case */ 401 #if defined(PETSC_USE_COMPLEX) 402 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 403 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 404 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 405 #else 406 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 407 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 408 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 409 #endif 410 } else { /* parallel cases */ 411 ptrdiff_t alloc_local,local_n0,local_0_start; 412 ptrdiff_t local_n1; 413 fftw_complex *data_fout; 414 ptrdiff_t local_1_start; 415 #if defined(PETSC_USE_COMPLEX) 416 fftw_complex *data_fin,*data_bout; 417 #else 418 double *data_finr,*data_boutr; 419 PetscInt n1,N1,vsize; 420 ptrdiff_t temp; 421 #endif 422 423 switch (ndim){ 424 case 1: 425 #if !defined(PETSC_USE_COMPLEX) 426 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 427 #else 428 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 429 if (fin) { 430 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 431 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 432 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 433 } 434 if (fout) { 435 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 436 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 437 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 438 } 439 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 440 if (bout) { 441 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 442 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 443 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 444 } 445 break; 446 #endif 447 case 2: 448 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 449 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 450 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 451 if (fin) { 452 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 453 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 454 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 455 } 456 if (bout) { 457 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 458 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 459 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 460 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 461 } 462 //n1 = 2*local_n1*dim[0]; 463 if (fout) { 464 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 465 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 466 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 467 } 468 #else 469 /* Get local size */ 470 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 471 if (fin) { 472 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 473 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 474 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 475 } 476 if (fout) { 477 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 478 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 479 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 if (bout) { 482 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 484 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 485 } 486 #endif 487 break; 488 case 3: 489 #if !defined(PETSC_USE_COMPLEX) 490 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 491 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 492 if (fin) { 493 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 494 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 495 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 496 } 497 if (bout) { 498 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 499 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 500 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 501 } 502 //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 503 if (fout) { 504 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 505 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 506 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 507 } 508 #else 509 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 510 if (fin) { 511 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 512 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 513 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 if (fout) { 516 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 517 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 518 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 if (bout) { 521 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 522 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 523 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 524 } 525 #endif 526 break; 527 default: 528 #if !defined(PETSC_USE_COMPLEX) 529 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 530 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 531 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 532 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 533 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 534 535 if (fin) { 536 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 537 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 538 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 if (bout) { 541 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 542 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 543 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 544 } 545 //temp = fftw->partial_dim; 546 //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 547 //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 548 if (fout) { 549 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 550 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 551 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 #else 554 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 555 if (fin) { 556 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 557 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 558 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 559 } 560 if (fout) { 561 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 562 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 563 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 564 } 565 if (bout) { 566 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 567 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 568 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 569 } 570 #endif 571 break; 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 EXTERN_C_END 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "VecScatterPetscToFFTW" 580 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 581 { 582 PetscErrorCode ierr; 583 PetscFunctionBegin; 584 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 /* 589 VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block. 590 Input A, x, y 591 A - FFTW matrix 592 x - user data 593 Options Database Keys: 594 + -mat_fftw_plannerflags - set FFTW planner flags 595 596 Level: intermediate 597 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 598 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 599 zeros. This routine does that job by scattering operation. 600 601 602 */ 603 604 EXTERN_C_BEGIN 605 #undef __FUNCT__ 606 #define __FUNCT__ "VecScatterPetscToFFTW_FTTW" 607 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 608 { 609 PetscErrorCode ierr; 610 MPI_Comm comm=((PetscObject)A)->comm; 611 Mat_FFT *fft = (Mat_FFT*)A->data; 612 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 613 PetscInt N=fft->N; 614 PetscInt ndim=fft->ndim,*dim=fft->dim; 615 PetscInt low; 616 PetscInt rank,size,vsize,vsize1; 617 ptrdiff_t alloc_local,local_n0,local_0_start; 618 ptrdiff_t local_n1,local_1_start; 619 VecScatter vecscat; 620 IS list1,list2; 621 #if !defined(PETSC_USE_COMPLEX) 622 PetscInt i,j,k,partial_dim; 623 PetscInt *indx1, *indx2, tempindx, tempindx1; 624 PetscInt N1, n1 ,NM; 625 ptrdiff_t temp; 626 #endif 627 628 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 629 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 630 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 631 632 if (size==1) 633 { 634 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 635 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 636 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 637 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 638 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 639 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 640 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 641 ierr = ISDestroy(&list1);CHKERRQ(ierr); 642 } 643 644 else{ 645 646 switch (ndim){ 647 case 1: 648 #if defined(PETSC_USE_COMPLEX) 649 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 650 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 651 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 652 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 653 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 654 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 655 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 656 ierr = ISDestroy(&list1);CHKERRQ(ierr); 657 ierr = ISDestroy(&list2);CHKERRQ(ierr); 658 #else 659 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 660 #endif 661 break; 662 case 2: 663 #if defined(PETSC_USE_COMPLEX) 664 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 665 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 666 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 667 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 668 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 669 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 670 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 671 ierr = ISDestroy(&list1);CHKERRQ(ierr); 672 ierr = ISDestroy(&list2);CHKERRQ(ierr); 673 #else 674 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 675 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 676 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 677 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 678 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 679 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 680 681 if (dim[1]%2==0) 682 NM = dim[1]+2; 683 else 684 NM = dim[1]+1; 685 686 for (i=0;i<local_n0;i++){ 687 for (j=0;j<dim[1];j++){ 688 tempindx = i*dim[1] + j; 689 tempindx1 = i*NM + j; 690 indx1[tempindx]=local_0_start*dim[1]+tempindx; 691 indx2[tempindx]=low+tempindx1; 692 } 693 } 694 695 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 696 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 697 698 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 699 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 700 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 701 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 702 ierr = ISDestroy(&list1);CHKERRQ(ierr); 703 ierr = ISDestroy(&list2);CHKERRQ(ierr); 704 ierr = PetscFree(indx1);CHKERRQ(ierr); 705 ierr = PetscFree(indx2);CHKERRQ(ierr); 706 #endif 707 break; 708 709 case 3: 710 #if defined(PETSC_USE_COMPLEX) 711 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 712 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 713 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 714 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 715 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 716 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 717 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 718 ierr = ISDestroy(&list1);CHKERRQ(ierr); 719 ierr = ISDestroy(&list2);CHKERRQ(ierr); 720 #else 721 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 722 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 723 724 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 725 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 726 727 if (dim[2]%2==0) 728 NM = dim[2]+2; 729 else 730 NM = dim[2]+1; 731 732 for (i=0;i<local_n0;i++){ 733 for (j=0;j<dim[1];j++){ 734 for (k=0;k<dim[2];k++){ 735 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 736 tempindx1 = i*dim[1]*NM + j*NM + k; 737 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 738 indx2[tempindx]=low+tempindx1; 739 } 740 } 741 } 742 743 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 744 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 745 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 746 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 747 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 748 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 749 ierr = ISDestroy(&list1);CHKERRQ(ierr); 750 ierr = ISDestroy(&list2);CHKERRQ(ierr); 751 ierr = PetscFree(indx1);CHKERRQ(ierr); 752 ierr = PetscFree(indx2);CHKERRQ(ierr); 753 #endif 754 break; 755 756 default: 757 #if defined(PETSC_USE_COMPLEX) 758 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 759 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 760 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 761 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 762 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 763 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 764 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 765 ierr = ISDestroy(&list1);CHKERRQ(ierr); 766 ierr = ISDestroy(&list2);CHKERRQ(ierr); 767 #else 768 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 769 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 770 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 771 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 772 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 773 774 partial_dim = fftw->partial_dim; 775 776 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 777 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 778 779 if (dim[ndim-1]%2==0) 780 NM = 2; 781 else 782 NM = 1; 783 784 j = low; 785 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 786 { 787 indx1[i] = local_0_start*partial_dim + i; 788 indx2[i] = j; 789 if (k%dim[ndim-1]==0) 790 { j+=NM;} 791 j++; 792 } 793 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 794 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 795 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 796 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 799 ierr = ISDestroy(&list1);CHKERRQ(ierr); 800 ierr = ISDestroy(&list2);CHKERRQ(ierr); 801 ierr = PetscFree(indx1);CHKERRQ(ierr); 802 ierr = PetscFree(indx2);CHKERRQ(ierr); 803 #endif 804 break; 805 } 806 } 807 return(0); 808 } 809 EXTERN_C_END 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "VecScatterFFTWToPetsc" 813 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 814 { 815 PetscErrorCode ierr; 816 PetscFunctionBegin; 817 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 818 PetscFunctionReturn(0); 819 } 820 821 /* 822 VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use. 823 Input A, x, y 824 A - FFTW matrix 825 x - FFTW vector 826 y - PETSc vector that user can read 827 828 Level: intermediate 829 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 830 VecScatterFFTWToPetsc removes those extra zeros. 831 832 */ 833 834 EXTERN_C_BEGIN 835 #undef __FUNCT__ 836 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 837 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 838 { 839 PetscErrorCode ierr; 840 MPI_Comm comm=((PetscObject)A)->comm; 841 Mat_FFT *fft = (Mat_FFT*)A->data; 842 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 843 PetscInt N=fft->N; 844 PetscInt ndim=fft->ndim,*dim=fft->dim; 845 PetscInt low; 846 PetscInt rank,size; 847 ptrdiff_t alloc_local,local_n0,local_0_start; 848 ptrdiff_t local_n1,local_1_start; 849 VecScatter vecscat; 850 IS list1,list2; 851 #if !defined(PETSC_USE_COMPLEX) 852 PetscInt i,j,k,partial_dim; 853 PetscInt *indx1, *indx2, tempindx, tempindx1; 854 PetscInt N1, n1 ,NM; 855 ptrdiff_t temp; 856 #endif 857 858 859 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 860 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 861 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 862 863 if (size==1){ 864 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 865 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 866 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 869 ierr = ISDestroy(&list1);CHKERRQ(ierr); 870 } 871 else{ 872 873 switch (ndim){ 874 case 1: 875 #if defined(PETSC_USE_COMPLEX) 876 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 877 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 878 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 879 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 880 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 883 ierr = ISDestroy(&list1);CHKERRQ(ierr); 884 ierr = ISDestroy(&list2);CHKERRQ(ierr); 885 #else 886 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 887 #endif 888 break; 889 case 2: 890 #if defined(PETSC_USE_COMPLEX) 891 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 892 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 893 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 894 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 895 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 898 ierr = ISDestroy(&list1);CHKERRQ(ierr); 899 ierr = ISDestroy(&list2);CHKERRQ(ierr); 900 #else 901 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 902 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 903 904 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 905 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 906 907 if (dim[1]%2==0) 908 NM = dim[1]+2; 909 else 910 NM = dim[1]+1; 911 912 for (i=0;i<local_n0;i++){ 913 for (j=0;j<dim[1];j++){ 914 tempindx = i*dim[1] + j; 915 tempindx1 = i*NM + j; 916 indx1[tempindx]=local_0_start*dim[1]+tempindx; 917 indx2[tempindx]=low+tempindx1; 918 } 919 } 920 921 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 922 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 923 924 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 925 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 928 ierr = ISDestroy(&list1);CHKERRQ(ierr); 929 ierr = ISDestroy(&list2);CHKERRQ(ierr); 930 ierr = PetscFree(indx1);CHKERRQ(ierr); 931 ierr = PetscFree(indx2);CHKERRQ(ierr); 932 #endif 933 break; 934 case 3: 935 #if defined(PETSC_USE_COMPLEX) 936 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 937 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 938 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 939 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 940 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 943 ierr = ISDestroy(&list1);CHKERRQ(ierr); 944 ierr = ISDestroy(&list2);CHKERRQ(ierr); 945 #else 946 947 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 948 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 949 950 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 951 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 952 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 953 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 954 955 if (dim[2]%2==0) 956 NM = dim[2]+2; 957 else 958 NM = dim[2]+1; 959 960 for (i=0;i<local_n0;i++){ 961 for (j=0;j<dim[1];j++){ 962 for (k=0;k<dim[2];k++){ 963 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 964 tempindx1 = i*dim[1]*NM + j*NM + k; 965 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 966 indx2[tempindx]=low+tempindx1; 967 } 968 } 969 } 970 971 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 972 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 973 974 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 975 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 978 ierr = ISDestroy(&list1);CHKERRQ(ierr); 979 ierr = ISDestroy(&list2);CHKERRQ(ierr); 980 ierr = PetscFree(indx1);CHKERRQ(ierr); 981 ierr = PetscFree(indx2);CHKERRQ(ierr); 982 #endif 983 break; 984 default: 985 #if defined(PETSC_USE_COMPLEX) 986 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 987 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 988 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 989 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 990 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 991 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 993 ierr = ISDestroy(&list1);CHKERRQ(ierr); 994 ierr = ISDestroy(&list2);CHKERRQ(ierr); 995 #else 996 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 997 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 998 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 999 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1000 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1001 1002 partial_dim = fftw->partial_dim; 1003 1004 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1005 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1006 1007 if (dim[ndim-1]%2==0) 1008 NM = 2; 1009 else 1010 NM = 1; 1011 1012 j = low; 1013 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1014 { 1015 indx1[i] = local_0_start*partial_dim + i; 1016 indx2[i] = j; 1017 if (k%dim[ndim-1]==0) 1018 { j+=NM;} 1019 j++; 1020 } 1021 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1022 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1023 1024 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1025 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1028 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1029 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1030 ierr = PetscFree(indx1);CHKERRQ(ierr); 1031 ierr = PetscFree(indx2);CHKERRQ(ierr); 1032 #endif 1033 break; 1034 } 1035 } 1036 return 0; 1037 } 1038 EXTERN_C_END 1039 1040 EXTERN_C_BEGIN 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatCreate_FFTW" 1043 /* 1044 MatCreate_FFTW - Creates a matrix object that provides FFT 1045 via the external package FFTW 1046 Options Database Keys: 1047 + -mat_fftw_plannerflags - set FFTW planner flags 1048 1049 Level: intermediate 1050 1051 */ 1052 1053 PetscErrorCode MatCreate_FFTW(Mat A) 1054 { 1055 PetscErrorCode ierr; 1056 MPI_Comm comm=((PetscObject)A)->comm; 1057 Mat_FFT *fft=(Mat_FFT*)A->data; 1058 Mat_FFTW *fftw; 1059 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1060 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1061 PetscBool flg; 1062 PetscInt p_flag,partial_dim=1,ctr; 1063 PetscMPIInt size,rank; 1064 ptrdiff_t *pdim; 1065 ptrdiff_t local_n1,local_1_start; 1066 #if !defined(PETSC_USE_COMPLEX) 1067 ptrdiff_t temp; 1068 PetscInt N1,tot_dim; 1069 #else 1070 PetscInt n1; 1071 #endif 1072 1073 PetscFunctionBegin; 1074 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1075 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1076 1077 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1078 pdim[0] = dim[0]; 1079 #if !defined(PETSC_USE_COMPLEX) 1080 tot_dim = 2*dim[0]; 1081 #endif 1082 for (ctr=1;ctr<ndim;ctr++) 1083 { 1084 partial_dim *= dim[ctr]; 1085 pdim[ctr] = dim[ctr]; 1086 #if !defined(PETSC_USE_COMPLEX) 1087 if (ctr==ndim-1) 1088 tot_dim *= (dim[ctr]/2+1); 1089 else 1090 tot_dim *= dim[ctr]; 1091 #endif 1092 } 1093 1094 if (size == 1) { 1095 #if defined(PETSC_USE_COMPLEX) 1096 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1097 n = N; 1098 #else 1099 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1100 n = tot_dim; 1101 #endif 1102 1103 } else { 1104 ptrdiff_t alloc_local,local_n0,local_0_start; 1105 switch (ndim){ 1106 case 1: 1107 #if !defined(PETSC_USE_COMPLEX) 1108 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1109 #else 1110 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1111 n = (PetscInt)local_n0; 1112 n1 = (PetscInt) local_n1; 1113 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1114 #endif 1115 break; 1116 case 2: 1117 #if defined(PETSC_USE_COMPLEX) 1118 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1119 /* 1120 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1121 PetscSynchronizedFlush(comm); 1122 */ 1123 n = (PetscInt)local_n0*dim[1]; 1124 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1125 #else 1126 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1127 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1128 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1129 #endif 1130 break; 1131 case 3: 1132 #if defined(PETSC_USE_COMPLEX) 1133 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1134 n = (PetscInt)local_n0*dim[1]*dim[2]; 1135 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1136 #else 1137 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1138 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1139 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1140 #endif 1141 break; 1142 default: 1143 #if defined(PETSC_USE_COMPLEX) 1144 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1145 n = (PetscInt)local_n0*partial_dim; 1146 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1147 #else 1148 temp = pdim[ndim-1]; 1149 pdim[ndim-1] = temp/2 + 1; 1150 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1151 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1152 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1153 pdim[ndim-1] = temp; 1154 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1155 #endif 1156 break; 1157 } 1158 } 1159 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1160 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1161 fft->data = (void*)fftw; 1162 1163 fft->n = n; 1164 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1165 fftw->partial_dim = partial_dim; 1166 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1167 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1168 1169 fftw->p_forward = 0; 1170 fftw->p_backward = 0; 1171 fftw->p_flag = FFTW_ESTIMATE; 1172 1173 if (size == 1){ 1174 A->ops->mult = MatMult_SeqFFTW; 1175 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1176 } else { 1177 A->ops->mult = MatMult_MPIFFTW; 1178 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1179 } 1180 fft->matdestroy = MatDestroy_FFTW; 1181 A->assembled = PETSC_TRUE; 1182 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 1183 PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW); 1184 PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW); 1185 1186 /* get runtime options */ 1187 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1188 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1189 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1190 PetscOptionsEnd(); 1191 PetscFunctionReturn(0); 1192 } 1193 EXTERN_C_END 1194 1195 1196 1197 1198