1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 Input parameter: 30 A - the matrix 31 x - the vector on which FDFT will be performed 32 33 Output parameter: 34 y - vector that stores result of FDFT 35 */ 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMult_SeqFFTW" 38 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39 { 40 PetscErrorCode ierr; 41 Mat_FFT *fft = (Mat_FFT*)A->data; 42 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43 PetscScalar *x_array,*y_array; 44 PetscInt ndim=fft->ndim,*dim=fft->dim; 45 46 PetscFunctionBegin; 47 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 49 if (!fftw->p_forward) { /* create a plan, then excute it */ 50 switch (ndim) { 51 case 1: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 2: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 case 3: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 default: 73 #if defined(PETSC_USE_COMPLEX) 74 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 75 #else 76 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 77 #endif 78 break; 79 } 80 fftw->finarray = x_array; 81 fftw->foutarray = y_array; 82 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 83 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 84 fftw_execute(fftw->p_forward); 85 } else { /* use existing plan */ 86 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 87 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 88 } else { 89 fftw_execute(fftw->p_forward); 90 } 91 } 92 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 93 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 /* MatMultTranspose_SeqFFTW performs serial backward DFT 98 Input parameter: 99 A - the matrix 100 x - the vector on which BDFT will be performed 101 102 Output parameter: 103 y - vector that stores result of BDFT 104 */ 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 108 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 109 { 110 PetscErrorCode ierr; 111 Mat_FFT *fft = (Mat_FFT*)A->data; 112 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 113 PetscScalar *x_array,*y_array; 114 PetscInt ndim=fft->ndim,*dim=fft->dim; 115 116 PetscFunctionBegin; 117 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 118 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 119 if (!fftw->p_backward) { /* create a plan, then excute it */ 120 switch (ndim) { 121 case 1: 122 #if defined(PETSC_USE_COMPLEX) 123 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 124 #else 125 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 126 #endif 127 break; 128 case 2: 129 #if defined(PETSC_USE_COMPLEX) 130 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 131 #else 132 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 133 #endif 134 break; 135 case 3: 136 #if defined(PETSC_USE_COMPLEX) 137 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 138 #else 139 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 140 #endif 141 break; 142 default: 143 #if defined(PETSC_USE_COMPLEX) 144 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 145 #else 146 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 147 #endif 148 break; 149 } 150 fftw->binarray = x_array; 151 fftw->boutarray = y_array; 152 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 153 } else { /* use existing plan */ 154 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 155 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 156 } else { 157 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 158 } 159 } 160 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 161 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 /* MatMult_MPIFFTW performs forward DFT in parallel 166 Input parameter: 167 A - the matrix 168 x - the vector on which FDFT will be performed 169 170 Output parameter: 171 y - vector that stores result of FDFT 172 */ 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatMult_MPIFFTW" 175 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 176 { 177 PetscErrorCode ierr; 178 Mat_FFT *fft = (Mat_FFT*)A->data; 179 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 180 PetscScalar *x_array,*y_array; 181 PetscInt ndim=fft->ndim,*dim=fft->dim; 182 MPI_Comm comm=((PetscObject)A)->comm; 183 184 PetscFunctionBegin; 185 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 186 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 187 if (!fftw->p_forward) { /* create a plan, then excute it */ 188 switch (ndim) { 189 case 1: 190 #if defined(PETSC_USE_COMPLEX) 191 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 192 #else 193 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 194 #endif 195 break; 196 case 2: 197 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 198 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 199 #else 200 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 201 #endif 202 break; 203 case 3: 204 #if defined(PETSC_USE_COMPLEX) 205 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 206 #else 207 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 208 #endif 209 break; 210 default: 211 #if defined(PETSC_USE_COMPLEX) 212 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 213 #else 214 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 215 #endif 216 break; 217 } 218 fftw->finarray = x_array; 219 fftw->foutarray = y_array; 220 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 221 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 222 fftw_execute(fftw->p_forward); 223 } else { /* use existing plan */ 224 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 225 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 226 } else { 227 fftw_execute(fftw->p_forward); 228 } 229 } 230 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 231 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 236 Input parameter: 237 A - the matrix 238 x - the vector on which BDFT will be performed 239 240 Output parameter: 241 y - vector that stores result of BDFT 242 */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 245 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 246 { 247 PetscErrorCode ierr; 248 Mat_FFT *fft = (Mat_FFT*)A->data; 249 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 250 PetscScalar *x_array,*y_array; 251 PetscInt ndim=fft->ndim,*dim=fft->dim; 252 MPI_Comm comm=((PetscObject)A)->comm; 253 254 PetscFunctionBegin; 255 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 256 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 257 if (!fftw->p_backward) { /* create a plan, then excute it */ 258 switch (ndim) { 259 case 1: 260 #if defined(PETSC_USE_COMPLEX) 261 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 262 #else 263 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 264 #endif 265 break; 266 case 2: 267 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 268 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 269 #else 270 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 271 #endif 272 break; 273 case 3: 274 #if defined(PETSC_USE_COMPLEX) 275 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 276 #else 277 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 278 #endif 279 break; 280 default: 281 #if defined(PETSC_USE_COMPLEX) 282 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 283 #else 284 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 285 #endif 286 break; 287 } 288 fftw->binarray = x_array; 289 fftw->boutarray = y_array; 290 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 291 } else { /* use existing plan */ 292 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 293 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 294 } else { 295 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 296 } 297 } 298 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 299 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatDestroy_FFTW" 305 PetscErrorCode MatDestroy_FFTW(Mat A) 306 { 307 Mat_FFT *fft = (Mat_FFT*)A->data; 308 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 309 PetscErrorCode ierr; 310 311 PetscFunctionBegin; 312 fftw_destroy_plan(fftw->p_forward); 313 fftw_destroy_plan(fftw->p_backward); 314 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 315 ierr = PetscFree(fft->data);CHKERRQ(ierr); 316 fftw_mpi_cleanup(); 317 PetscFunctionReturn(0); 318 } 319 320 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 321 #undef __FUNCT__ 322 #define __FUNCT__ "VecDestroy_MPIFFTW" 323 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 324 { 325 PetscErrorCode ierr; 326 PetscScalar *array; 327 328 PetscFunctionBegin; 329 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 330 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 331 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 332 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatGetVecsFFTW" 338 /*@ 339 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 340 parallel layout determined by FFTW 341 342 Collective on Mat 343 344 Input Parameter: 345 . A - the matrix 346 347 Output Parameter: 348 + x - (optional) input vector of forward FFTW 349 - y - (optional) output vector of forward FFTW 350 - z - (optional) output vector of backward FFTW 351 352 Level: advanced 353 354 Note: The parallel layout of output of forward FFTW is always same as the input 355 of the backward FFTW. But parallel layout of the input vector of forward 356 FFTW might not be same as the output of backward FFTW. 357 Also note that we need to provide enough space while doing parallel real transform. 358 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 359 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 360 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 361 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 362 zeros if it is odd only one zero is needed. 363 Lastly one needs some scratch space at the end of data set in each process. alloc_local 364 figures out how much space is needed, i.e. it figures out the data+scratch space for 365 each processor and returns that. 366 367 .seealso: MatCreateFFTW() 368 @*/ 369 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 370 { 371 PetscErrorCode ierr; 372 373 PetscFunctionBegin; 374 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 375 PetscFunctionReturn(0); 376 } 377 378 EXTERN_C_BEGIN 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 381 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 382 { 383 PetscErrorCode ierr; 384 PetscMPIInt size,rank; 385 MPI_Comm comm = ((PetscObject)A)->comm; 386 Mat_FFT *fft = (Mat_FFT*)A->data; 387 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 388 PetscInt N = fft->N; 389 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 390 391 PetscFunctionBegin; 392 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 393 PetscValidType(A,1); 394 395 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 396 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 397 if (size == 1) { /* sequential case */ 398 #if defined(PETSC_USE_COMPLEX) 399 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 400 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 401 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 402 #else 403 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 404 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 405 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 406 #endif 407 } else { /* parallel cases */ 408 ptrdiff_t alloc_local,local_n0,local_0_start; 409 ptrdiff_t local_n1; 410 fftw_complex *data_fout; 411 ptrdiff_t local_1_start; 412 #if defined(PETSC_USE_COMPLEX) 413 fftw_complex *data_fin,*data_bout; 414 #else 415 double *data_finr,*data_boutr; 416 PetscInt n1,N1; 417 ptrdiff_t temp; 418 #endif 419 420 switch (ndim) { 421 case 1: 422 #if !defined(PETSC_USE_COMPLEX) 423 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 424 #else 425 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 426 if (fin) { 427 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 428 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 429 430 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 431 } 432 if (fout) { 433 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 434 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 435 436 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 437 } 438 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 439 if (bout) { 440 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 441 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 442 443 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 444 } 445 break; 446 #endif 447 case 2: 448 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 449 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 450 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 451 if (fin) { 452 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 453 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 454 455 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 456 } 457 if (bout) { 458 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 459 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 460 461 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 462 } 463 if (fout) { 464 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 465 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 466 467 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 468 } 469 #else 470 /* Get local size */ 471 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 472 if (fin) { 473 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 474 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 475 476 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 477 } 478 if (fout) { 479 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 480 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 481 482 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 483 } 484 if (bout) { 485 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 486 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 487 488 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 489 } 490 #endif 491 break; 492 case 3: 493 #if !defined(PETSC_USE_COMPLEX) 494 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 495 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 496 if (fin) { 497 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 498 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 499 500 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 501 } 502 if (bout) { 503 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 504 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 505 506 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 507 } 508 509 if (fout) { 510 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 511 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 512 513 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 #else 516 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 517 if (fin) { 518 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 519 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 520 521 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 522 } 523 if (fout) { 524 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 525 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 526 527 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 528 } 529 if (bout) { 530 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 531 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 532 533 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 #endif 536 break; 537 default: 538 #if !defined(PETSC_USE_COMPLEX) 539 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 540 541 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 542 543 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 544 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 545 546 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 547 548 if (fin) { 549 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 550 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 551 552 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 553 } 554 if (bout) { 555 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 556 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 557 558 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 559 } 560 561 if (fout) { 562 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 563 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 564 565 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 566 } 567 #else 568 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 569 if (fin) { 570 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 571 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 572 573 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 574 } 575 if (fout) { 576 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 577 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 578 579 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 580 } 581 if (bout) { 582 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 583 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 584 585 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 586 } 587 #endif 588 break; 589 } 590 } 591 PetscFunctionReturn(0); 592 } 593 EXTERN_C_END 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "VecScatterPetscToFFTW" 597 /*@ 598 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 599 600 Collective on Mat 601 602 Input Parameters: 603 + A - FFTW matrix 604 - x - the PETSc vector 605 606 Output Parameters: 607 . y - the FFTW vector 608 609 Options Database Keys: 610 . -mat_fftw_plannerflags - set FFTW planner flags 611 612 Level: intermediate 613 614 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 615 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 616 zeros. This routine does that job by scattering operation. 617 618 .seealso: VecScatterFFTWToPetsc() 619 @*/ 620 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 621 { 622 PetscErrorCode ierr; 623 624 PetscFunctionBegin; 625 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 EXTERN_C_BEGIN 630 #undef __FUNCT__ 631 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 632 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 633 { 634 PetscErrorCode ierr; 635 MPI_Comm comm =((PetscObject)A)->comm; 636 Mat_FFT *fft = (Mat_FFT*)A->data; 637 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 638 PetscInt N =fft->N; 639 PetscInt ndim =fft->ndim,*dim=fft->dim; 640 PetscInt low; 641 PetscInt rank,size,vsize,vsize1; 642 ptrdiff_t alloc_local,local_n0,local_0_start; 643 ptrdiff_t local_n1,local_1_start; 644 VecScatter vecscat; 645 IS list1,list2; 646 #if !defined(PETSC_USE_COMPLEX) 647 PetscInt i,j,k,partial_dim; 648 PetscInt *indx1, *indx2, tempindx, tempindx1; 649 PetscInt N1, n1,NM; 650 ptrdiff_t temp; 651 #endif 652 653 PetscFunctionBegin; 654 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 655 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 656 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 657 658 if (size==1) { 659 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 660 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 661 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 662 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 663 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 664 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 665 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 666 ierr = ISDestroy(&list1);CHKERRQ(ierr); 667 } else { 668 switch (ndim) { 669 case 1: 670 #if defined(PETSC_USE_COMPLEX) 671 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 672 673 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 674 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 675 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 676 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 677 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 678 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 679 ierr = ISDestroy(&list1);CHKERRQ(ierr); 680 ierr = ISDestroy(&list2);CHKERRQ(ierr); 681 #else 682 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 683 #endif 684 break; 685 case 2: 686 #if defined(PETSC_USE_COMPLEX) 687 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 688 689 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 690 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 691 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 692 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 693 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 694 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 695 ierr = ISDestroy(&list1);CHKERRQ(ierr); 696 ierr = ISDestroy(&list2);CHKERRQ(ierr); 697 #else 698 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 699 700 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 701 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 702 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 703 704 if (dim[1]%2==0) { 705 NM = dim[1]+2; 706 } else { 707 NM = dim[1]+1; 708 } 709 for (i=0; i<local_n0; i++) { 710 for (j=0; j<dim[1]; j++) { 711 tempindx = i*dim[1] + j; 712 tempindx1 = i*NM + j; 713 714 indx1[tempindx]=local_0_start*dim[1]+tempindx; 715 indx2[tempindx]=low+tempindx1; 716 } 717 } 718 719 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 720 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 721 722 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 723 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 724 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 725 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 726 ierr = ISDestroy(&list1);CHKERRQ(ierr); 727 ierr = ISDestroy(&list2);CHKERRQ(ierr); 728 ierr = PetscFree(indx1);CHKERRQ(ierr); 729 ierr = PetscFree(indx2);CHKERRQ(ierr); 730 #endif 731 break; 732 733 case 3: 734 #if defined(PETSC_USE_COMPLEX) 735 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 736 737 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 738 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 739 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 740 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 741 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 743 ierr = ISDestroy(&list1);CHKERRQ(ierr); 744 ierr = ISDestroy(&list2);CHKERRQ(ierr); 745 #else 746 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 747 748 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 749 750 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 751 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 752 753 if (dim[2]%2==0) NM = dim[2]+2; 754 else NM = dim[2]+1; 755 756 for (i=0; i<local_n0; i++) { 757 for (j=0; j<dim[1]; j++) { 758 for (k=0; k<dim[2]; k++) { 759 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 760 tempindx1 = i*dim[1]*NM + j*NM + k; 761 762 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 763 indx2[tempindx]=low+tempindx1; 764 } 765 } 766 } 767 768 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 769 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 770 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 771 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 772 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 773 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 774 ierr = ISDestroy(&list1);CHKERRQ(ierr); 775 ierr = ISDestroy(&list2);CHKERRQ(ierr); 776 ierr = PetscFree(indx1);CHKERRQ(ierr); 777 ierr = PetscFree(indx2);CHKERRQ(ierr); 778 #endif 779 break; 780 781 default: 782 #if defined(PETSC_USE_COMPLEX) 783 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 784 785 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 786 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 787 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 788 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 789 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 790 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 791 ierr = ISDestroy(&list1);CHKERRQ(ierr); 792 ierr = ISDestroy(&list2);CHKERRQ(ierr); 793 #else 794 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 795 796 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 797 798 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 799 800 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 801 802 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 803 804 partial_dim = fftw->partial_dim; 805 806 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 807 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 808 809 if (dim[ndim-1]%2==0) NM = 2; 810 else NM = 1; 811 812 j = low; 813 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 814 indx1[i] = local_0_start*partial_dim + i; 815 indx2[i] = j; 816 if (k%dim[ndim-1]==0) j+=NM; 817 j++; 818 } 819 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 820 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 821 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 822 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 823 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 824 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 825 ierr = ISDestroy(&list1);CHKERRQ(ierr); 826 ierr = ISDestroy(&list2);CHKERRQ(ierr); 827 ierr = PetscFree(indx1);CHKERRQ(ierr); 828 ierr = PetscFree(indx2);CHKERRQ(ierr); 829 #endif 830 break; 831 } 832 } 833 PetscFunctionReturn(0); 834 } 835 EXTERN_C_END 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "VecScatterFFTWToPetsc" 839 /*@ 840 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 841 842 Collective on Mat 843 844 Input Parameters: 845 + A - FFTW matrix 846 - x - FFTW vector 847 848 Output Parameters: 849 . y - PETSc vector 850 851 Level: intermediate 852 853 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 854 VecScatterFFTWToPetsc removes those extra zeros. 855 856 .seealso: VecScatterPetscToFFTW() 857 @*/ 858 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 859 { 860 PetscErrorCode ierr; 861 862 PetscFunctionBegin; 863 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 EXTERN_C_BEGIN 868 #undef __FUNCT__ 869 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 870 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 871 { 872 PetscErrorCode ierr; 873 MPI_Comm comm = ((PetscObject)A)->comm; 874 Mat_FFT *fft = (Mat_FFT*)A->data; 875 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 876 PetscInt N = fft->N; 877 PetscInt ndim = fft->ndim,*dim=fft->dim; 878 PetscInt low; 879 PetscInt rank,size; 880 ptrdiff_t alloc_local,local_n0,local_0_start; 881 ptrdiff_t local_n1,local_1_start; 882 VecScatter vecscat; 883 IS list1,list2; 884 #if !defined(PETSC_USE_COMPLEX) 885 PetscInt i,j,k,partial_dim; 886 PetscInt *indx1, *indx2, tempindx, tempindx1; 887 PetscInt N1, n1,NM; 888 ptrdiff_t temp; 889 #endif 890 891 PetscFunctionBegin; 892 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 893 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 894 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 895 896 if (size==1) { 897 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 898 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 899 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 902 ierr = ISDestroy(&list1);CHKERRQ(ierr); 903 904 } else { 905 switch (ndim) { 906 case 1: 907 #if defined(PETSC_USE_COMPLEX) 908 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 909 910 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 911 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 912 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 913 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 915 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 916 ierr = ISDestroy(&list1);CHKERRQ(ierr); 917 ierr = ISDestroy(&list2);CHKERRQ(ierr); 918 #else 919 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 920 #endif 921 break; 922 case 2: 923 #if defined(PETSC_USE_COMPLEX) 924 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 925 926 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 927 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 928 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 929 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 930 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 932 ierr = ISDestroy(&list1);CHKERRQ(ierr); 933 ierr = ISDestroy(&list2);CHKERRQ(ierr); 934 #else 935 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 936 937 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 938 939 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 940 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 941 942 if (dim[1]%2==0) NM = dim[1]+2; 943 else NM = dim[1]+1; 944 945 for (i=0; i<local_n0; i++) { 946 for (j=0; j<dim[1]; j++) { 947 tempindx = i*dim[1] + j; 948 tempindx1 = i*NM + j; 949 950 indx1[tempindx]=local_0_start*dim[1]+tempindx; 951 indx2[tempindx]=low+tempindx1; 952 } 953 } 954 955 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 956 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 957 958 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 959 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 962 ierr = ISDestroy(&list1);CHKERRQ(ierr); 963 ierr = ISDestroy(&list2);CHKERRQ(ierr); 964 ierr = PetscFree(indx1);CHKERRQ(ierr); 965 ierr = PetscFree(indx2);CHKERRQ(ierr); 966 #endif 967 break; 968 case 3: 969 #if defined(PETSC_USE_COMPLEX) 970 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 971 972 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 973 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 974 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 975 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 978 ierr = ISDestroy(&list1);CHKERRQ(ierr); 979 ierr = ISDestroy(&list2);CHKERRQ(ierr); 980 #else 981 982 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 983 984 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 985 986 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 987 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 988 989 if (dim[2]%2==0) NM = dim[2]+2; 990 else NM = dim[2]+1; 991 992 for (i=0; i<local_n0; i++) { 993 for (j=0; j<dim[1]; j++) { 994 for (k=0; k<dim[2]; k++) { 995 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 996 tempindx1 = i*dim[1]*NM + j*NM + k; 997 998 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 999 indx2[tempindx]=low+tempindx1; 1000 } 1001 } 1002 } 1003 1004 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1005 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1006 1007 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1008 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1011 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1012 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1013 ierr = PetscFree(indx1);CHKERRQ(ierr); 1014 ierr = PetscFree(indx2);CHKERRQ(ierr); 1015 #endif 1016 break; 1017 default: 1018 #if defined(PETSC_USE_COMPLEX) 1019 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1020 1021 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1022 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1023 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1024 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1027 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1028 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1029 #else 1030 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1031 1032 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1033 1034 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1035 1036 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1037 1038 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1039 1040 partial_dim = fftw->partial_dim; 1041 1042 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1043 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1044 1045 if (dim[ndim-1]%2==0) NM = 2; 1046 else NM = 1; 1047 1048 j = low; 1049 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1050 indx1[i] = local_0_start*partial_dim + i; 1051 indx2[i] = j; 1052 if (k%dim[ndim-1]==0) j+=NM; 1053 j++; 1054 } 1055 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1056 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1057 1058 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1059 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1060 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1061 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1062 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1063 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1064 ierr = PetscFree(indx1);CHKERRQ(ierr); 1065 ierr = PetscFree(indx2);CHKERRQ(ierr); 1066 #endif 1067 break; 1068 } 1069 } 1070 PetscFunctionReturn(0); 1071 } 1072 EXTERN_C_END 1073 1074 EXTERN_C_BEGIN 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatCreate_FFTW" 1077 /* 1078 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1079 1080 Options Database Keys: 1081 + -mat_fftw_plannerflags - set FFTW planner flags 1082 1083 Level: intermediate 1084 1085 */ 1086 PetscErrorCode MatCreate_FFTW(Mat A) 1087 { 1088 PetscErrorCode ierr; 1089 MPI_Comm comm=((PetscObject)A)->comm; 1090 Mat_FFT *fft=(Mat_FFT*)A->data; 1091 Mat_FFTW *fftw; 1092 PetscInt n =fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1093 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1094 PetscBool flg; 1095 PetscInt p_flag,partial_dim=1,ctr; 1096 PetscMPIInt size,rank; 1097 ptrdiff_t *pdim; 1098 ptrdiff_t local_n1,local_1_start; 1099 #if !defined(PETSC_USE_COMPLEX) 1100 ptrdiff_t temp; 1101 PetscInt N1,tot_dim; 1102 #else 1103 PetscInt n1; 1104 #endif 1105 1106 PetscFunctionBegin; 1107 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1108 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1109 1110 fftw_mpi_init(); 1111 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1112 pdim[0] = dim[0]; 1113 #if !defined(PETSC_USE_COMPLEX) 1114 tot_dim = 2*dim[0]; 1115 #endif 1116 for (ctr=1; ctr<ndim; ctr++) { 1117 partial_dim *= dim[ctr]; 1118 pdim[ctr] = dim[ctr]; 1119 #if !defined(PETSC_USE_COMPLEX) 1120 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1121 else tot_dim *= dim[ctr]; 1122 #endif 1123 } 1124 1125 if (size == 1) { 1126 #if defined(PETSC_USE_COMPLEX) 1127 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1128 n = N; 1129 #else 1130 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1131 n = tot_dim; 1132 #endif 1133 1134 } else { 1135 ptrdiff_t alloc_local,local_n0,local_0_start; 1136 switch (ndim) { 1137 case 1: 1138 #if !defined(PETSC_USE_COMPLEX) 1139 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1140 #else 1141 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1142 1143 n = (PetscInt)local_n0; 1144 n1 = (PetscInt)local_n1; 1145 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1146 #endif 1147 break; 1148 case 2: 1149 #if defined(PETSC_USE_COMPLEX) 1150 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1151 /* 1152 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1153 PetscSynchronizedFlush(comm); 1154 */ 1155 n = (PetscInt)local_n0*dim[1]; 1156 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1157 #else 1158 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1159 1160 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1161 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1162 #endif 1163 break; 1164 case 3: 1165 #if defined(PETSC_USE_COMPLEX) 1166 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1167 1168 n = (PetscInt)local_n0*dim[1]*dim[2]; 1169 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1170 #else 1171 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1172 1173 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1174 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1175 #endif 1176 break; 1177 default: 1178 #if defined(PETSC_USE_COMPLEX) 1179 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1180 1181 n = (PetscInt)local_n0*partial_dim; 1182 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1183 #else 1184 temp = pdim[ndim-1]; 1185 1186 pdim[ndim-1] = temp/2 + 1; 1187 1188 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1189 1190 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1191 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1192 1193 pdim[ndim-1] = temp; 1194 1195 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1196 #endif 1197 break; 1198 } 1199 } 1200 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1201 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1202 fft->data = (void*)fftw; 1203 1204 fft->n = n; 1205 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1206 fftw->partial_dim = partial_dim; 1207 1208 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t*)&(fftw->dim_fftw));CHKERRQ(ierr); 1209 1210 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1211 1212 fftw->p_forward = 0; 1213 fftw->p_backward = 0; 1214 fftw->p_flag = FFTW_ESTIMATE; 1215 1216 if (size == 1) { 1217 A->ops->mult = MatMult_SeqFFTW; 1218 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1219 } else { 1220 A->ops->mult = MatMult_MPIFFTW; 1221 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1222 } 1223 fft->matdestroy = MatDestroy_FFTW; 1224 A->assembled = PETSC_TRUE; 1225 A->preallocated = PETSC_TRUE; 1226 1227 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1230 1231 /* get runtime options */ 1232 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1233 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1234 if (flg) fftw->p_flag = (unsigned)p_flag; 1235 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 EXTERN_C_END 1239 1240 1241 1242 1243