1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed! 28 29 #undef __FUNCT__ 30 #define __FUNCT__ "MatMult_SeqFFTW" 31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32 { 33 PetscErrorCode ierr; 34 Mat_FFT *fft = (Mat_FFT*)A->data; 35 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36 PetscScalar *x_array,*y_array; 37 PetscInt ndim=fft->ndim,*dim=fft->dim; 38 39 PetscFunctionBegin; 40 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42 if (!fftw->p_forward){ /* create a plan, then excute it */ 43 switch (ndim){ 44 case 1: 45 #if defined(PETSC_USE_COMPLEX) 46 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 47 #else 48 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 49 #endif 50 break; 51 case 2: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 3: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 default: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 } 73 fftw->finarray = x_array; 74 fftw->foutarray = y_array; 75 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77 fftw_execute(fftw->p_forward); 78 } else { /* use existing plan */ 79 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81 } else { 82 fftw_execute(fftw->p_forward); 83 } 84 } 85 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93 { 94 PetscErrorCode ierr; 95 Mat_FFT *fft = (Mat_FFT*)A->data; 96 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97 PetscScalar *x_array,*y_array; 98 PetscInt ndim=fft->ndim,*dim=fft->dim; 99 100 PetscFunctionBegin; 101 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103 if (!fftw->p_backward){ /* create a plan, then excute it */ 104 switch (ndim){ 105 case 1: 106 #if defined(PETSC_USE_COMPLEX) 107 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 108 #else 109 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 110 #endif 111 break; 112 case 2: 113 #if defined(PETSC_USE_COMPLEX) 114 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 115 #else 116 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 117 #endif 118 break; 119 case 3: 120 #if defined(PETSC_USE_COMPLEX) 121 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 122 #else 123 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 124 #endif 125 break; 126 default: 127 #if defined(PETSC_USE_COMPLEX) 128 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 129 #else 130 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 131 #endif 132 break; 133 } 134 fftw->binarray = x_array; 135 fftw->boutarray = y_array; 136 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137 } else { /* use existing plan */ 138 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140 } else { 141 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142 } 143 } 144 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatMult_MPIFFTW" 151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152 { 153 PetscErrorCode ierr; 154 Mat_FFT *fft = (Mat_FFT*)A->data; 155 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156 PetscScalar *x_array,*y_array; 157 PetscInt ndim=fft->ndim,*dim=fft->dim; 158 MPI_Comm comm=((PetscObject)A)->comm; 159 160 PetscFunctionBegin; 161 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163 if (!fftw->p_forward){ /* create a plan, then excute it */ 164 switch (ndim){ 165 case 1: 166 #if defined(PETSC_USE_COMPLEX) 167 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168 #else 169 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 170 #endif 171 break; 172 case 2: 173 #if defined(PETSC_USE_COMPLEX) 174 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 175 #else 176 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 177 #endif 178 break; 179 case 3: 180 #if defined(PETSC_USE_COMPLEX) 181 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 182 #else 183 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 184 #endif 185 break; 186 default: 187 #if defined(PETSC_USE_COMPLEX) 188 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 189 #else 190 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 191 #endif 192 break; 193 } 194 fftw->finarray = x_array; 195 fftw->foutarray = y_array; 196 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198 fftw_execute(fftw->p_forward); 199 } else { /* use existing plan */ 200 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202 } else { 203 fftw_execute(fftw->p_forward); 204 } 205 } 206 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214 { 215 PetscErrorCode ierr; 216 Mat_FFT *fft = (Mat_FFT*)A->data; 217 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218 PetscScalar *x_array,*y_array; 219 PetscInt ndim=fft->ndim,*dim=fft->dim; 220 MPI_Comm comm=((PetscObject)A)->comm; 221 222 PetscFunctionBegin; 223 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225 if (!fftw->p_backward){ /* create a plan, then excute it */ 226 switch (ndim){ 227 case 1: 228 #if defined(PETSC_USE_COMPLEX) 229 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230 #else 231 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 232 #endif 233 break; 234 case 2: 235 #if defined(PETSC_USE_COMPLEX) 236 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 237 #else 238 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 239 #endif 240 break; 241 case 3: 242 #if defined(PETSC_USE_COMPLEX) 243 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 244 #else 245 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 246 #endif 247 break; 248 default: 249 #if defined(PETSC_USE_COMPLEX) 250 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 251 #else 252 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 253 #endif 254 break; 255 } 256 fftw->binarray = x_array; 257 fftw->boutarray = y_array; 258 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259 } else { /* use existing plan */ 260 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262 } else { 263 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264 } 265 } 266 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_FFTW" 273 PetscErrorCode MatDestroy_FFTW(Mat A) 274 { 275 Mat_FFT *fft = (Mat_FFT*)A->data; 276 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 fftw_destroy_plan(fftw->p_forward); 281 fftw_destroy_plan(fftw->p_backward); 282 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283 ierr = PetscFree(fft->data);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288 #undef __FUNCT__ 289 #define __FUNCT__ "VecDestroy_MPIFFTW" 290 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291 { 292 PetscErrorCode ierr; 293 PetscScalar *array; 294 295 PetscFunctionBegin; 296 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatGetVecsFFTW_1DC" 305 /* 306 - Get Vectors(s) compatible with matrix, i.e. with the 307 parallel layout determined by FFTW-1D 308 309 */ 310 PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout) 311 { 312 PetscErrorCode ierr; 313 PetscMPIInt size,rank; 314 MPI_Comm comm=((PetscObject)A)->comm; 315 Mat_FFT *fft = (Mat_FFT*)A->data; 316 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 317 PetscInt N=fft->N; 318 PetscInt ndim=fft->ndim,*dim=fft->dim; 319 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 320 ptrdiff_t f_local_n1,f_local_1_end; 321 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 322 ptrdiff_t b_local_n1,b_local_1_end; 323 fftw_complex *data_fin,*data_fout,*data_bout; 324 325 PetscFunctionBegin; 326 #if !defined(PETSC_USE_COMPLEX) 327 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 328 #endif 329 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 330 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 331 if (size == 1){ 332 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 333 } 334 else { 335 if (ndim>1){ 336 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 337 else { 338 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 339 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 340 if (fin) { 341 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 342 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 343 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 344 } 345 if (fout) { 346 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 347 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 348 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 349 } 350 if (bout) { 351 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 352 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 353 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 354 } 355 } 356 if (fin){ 357 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 358 } 359 if (fout){ 360 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 361 } 362 if (bout){ 363 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 364 } 365 PetscFunctionReturn(0); 366 } 367 368 369 } 370 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatGetVecsFFTW" 373 /*@ 374 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 375 parallel layout determined by FFTW 376 377 Collective on Mat 378 379 Input Parameter: 380 . mat - the matrix 381 382 Output Parameter: 383 + fin - (optional) input vector of forward FFTW 384 - fout - (optional) output vector of forward FFTW 385 386 Level: advanced 387 388 .seealso: MatCreateFFTW() 389 @*/ 390 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 391 { 392 PetscErrorCode ierr; 393 PetscFunctionBegin; 394 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 EXTERN_C_BEGIN 399 #undef __FUNCT__ 400 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 401 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 402 { 403 PetscErrorCode ierr; 404 PetscMPIInt size,rank; 405 MPI_Comm comm=((PetscObject)A)->comm; 406 Mat_FFT *fft = (Mat_FFT*)A->data; 407 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 408 PetscInt N=fft->N; 409 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 410 411 PetscFunctionBegin; 412 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 413 PetscValidType(A,1); 414 415 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 416 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 417 if (size == 1){ /* sequential case */ 418 printf("Routine is getting called\n"); 419 #if defined(PETSC_USE_COMPLEX) 420 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 421 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 422 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 423 #else 424 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 425 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 426 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 427 #endif 428 } else { 429 ptrdiff_t alloc_local,local_n0,local_0_start; 430 ptrdiff_t local_n1; 431 fftw_complex *data_fout; 432 ptrdiff_t local_1_start; 433 #if defined(PETSC_USE_COMPLEX) 434 fftw_complex *data_fin,*data_bout; 435 #else 436 double *data_finr,*data_boutr; 437 PetscInt n1,N1,vsize; 438 ptrdiff_t temp; 439 #endif 440 441 switch (ndim){ 442 case 1: 443 #if !defined(PETSC_USE_COMPLEX) 444 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 445 #else 446 //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 447 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 448 if (fin) { 449 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 450 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 451 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 452 } 453 if (fout) { 454 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 455 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 456 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 457 } 458 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 459 if (bout) { 460 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 461 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 462 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 463 } 464 break; 465 #endif 466 case 2: 467 #if !defined(PETSC_USE_COMPLEX) 468 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 469 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 470 if (fin) { 471 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 472 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 473 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 474 } 475 if (bout) { 476 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 477 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 478 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 479 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 //n1 = 2*local_n1*dim[0]; 482 if (fout) { 483 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 484 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 485 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 486 } 487 #else 488 /* Get local size */ 489 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 490 if (fin) { 491 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 492 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 493 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 494 } 495 if (fout) { 496 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 497 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 498 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 499 } 500 if (bout) { 501 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 502 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 503 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 504 } 505 #endif 506 break; 507 case 3: 508 #if !defined(PETSC_USE_COMPLEX) 509 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 510 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 511 if (fin) { 512 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 513 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 514 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 515 } 516 if (bout) { 517 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 518 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 519 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 520 } 521 //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 522 if (fout) { 523 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 524 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 525 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 526 } 527 #else 528 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 529 if (fin) { 530 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 531 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 532 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 533 } 534 if (fout) { 535 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 536 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 537 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 538 } 539 if (bout) { 540 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 541 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 542 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 543 } 544 #endif 545 break; 546 default: 547 #if !defined(PETSC_USE_COMPLEX) 548 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 549 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 550 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 551 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 552 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 553 554 if (fin) { 555 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 556 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 557 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 558 } 559 if (bout) { 560 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 561 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 562 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 563 } 564 //temp = fftw->partial_dim; 565 //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 566 //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 567 if (fout) { 568 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 569 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 570 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 571 } 572 #else 573 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 574 if (fin) { 575 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 576 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 577 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 578 } 579 if (fout) { 580 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 581 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 582 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 583 } 584 if (bout) { 585 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 586 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 587 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 588 } 589 #endif 590 break; 591 } 592 } 593 PetscFunctionReturn(0); 594 } 595 EXTERN_C_END 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatGetVecs_FFTW" 599 /* 600 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 601 parallel layout determined by FFTW 602 603 Collective on Mat 604 605 Input Parameter: 606 . mat - the matrix 607 608 Output Parameter: 609 + fin - (optional) input vector of forward FFTW 610 - fout - (optional) output vector of forward FFTW 611 612 Level: advanced 613 614 .seealso: MatCreateFFTW() 615 */ 616 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 617 { 618 PetscErrorCode ierr; 619 PetscMPIInt size,rank; 620 MPI_Comm comm=((PetscObject)A)->comm; 621 Mat_FFT *fft = (Mat_FFT*)A->data; 622 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 623 PetscInt N=fft->N; 624 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 625 626 PetscFunctionBegin; 627 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 628 PetscValidType(A,1); 629 630 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 631 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 632 if (size == 1){ /* sequential case */ 633 #if defined(PETSC_USE_COMPLEX) 634 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 635 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 636 #else 637 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 638 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 639 #endif 640 } else { /* mpi case */ 641 ptrdiff_t alloc_local,local_n0,local_0_start; 642 ptrdiff_t local_n1,local_1_end; 643 fftw_complex *data_fin,*data_fout; 644 #if !defined(PETSC_USE_COMPLEX) 645 double *data_finr ; 646 ptrdiff_t local_1_start,temp; 647 PetscInt vsize,n1,N1; 648 #endif 649 650 // PetscInt ctr; 651 // ptrdiff_t ndim1,*pdim; 652 // ndim1=(ptrdiff_t) ndim; 653 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 654 655 // for(ctr=0;ctr<ndim;ctr++) 656 // {k 657 // pdim[ctr] = dim[ctr]; 658 // } 659 660 661 662 switch (ndim){ 663 case 1: 664 /* Get local size */ 665 /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 666 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 667 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 668 printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1); 669 if (fin) { 670 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 671 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 672 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 673 } 674 if (fout) { 675 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 676 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 677 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 678 } 679 break; 680 case 2: 681 #if !defined(PETSC_USE_COMPLEX) 682 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 683 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 684 if (fin) { 685 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 686 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 687 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 688 //printf("The code comes here with vector size %d\n",vsize); 689 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 690 } 691 n1 = 2*local_n1*(dim[0]); 692 //n1 = 2*local_n1*dim[1]; 693 printf("The values are %ld\n",local_n1); 694 if (fout) { 695 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 696 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 697 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 698 } 699 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 700 701 #else 702 /* Get local size */ 703 //printf("Hope this does not come here"); 704 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 705 if (fin) { 706 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 707 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 708 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 709 } 710 if (fout) { 711 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 712 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 713 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 714 } 715 //printf("Hope this does not come here"); 716 #endif 717 break; 718 case 3: 719 /* Get local size */ 720 #if !defined(PETSC_USE_COMPLEX) 721 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 722 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 723 if (fin) { 724 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 725 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 726 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 727 //printf("The code comes here with vector size %d\n",vsize); 728 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 729 } 730 printf("The value is %ld",local_n1); 731 n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 732 if (fout) { 733 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 734 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 735 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 736 } 737 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 738 739 740 #else 741 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 742 // printf("The quantity n is %d",n); 743 if (fin) { 744 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 745 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 746 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 747 } 748 if (fout) { 749 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 750 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 751 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 752 } 753 #endif 754 break; 755 default: 756 /* Get local size */ 757 #if !defined(PETSC_USE_COMPLEX) 758 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 759 printf("The value of temp is %ld\n",temp); 760 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 761 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 762 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 763 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 764 if (fin) { 765 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 766 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 767 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 768 //printf("The code comes here with vector size %d\n",vsize); 769 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 770 } 771 printf("The value is %ld. Global length is %d \n",local_n1,N1); 772 temp = fftw->partial_dim; 773 fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 774 775 n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 776 if (fout) { 777 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 778 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 779 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 780 } 781 782 #else 783 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 784 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 785 // printf("The value of alloc local is %d",alloc_local); 786 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 787 // for(i=0;i<ndim;i++) 788 // { 789 // pdim[i]=dim[i];printf("%d",pdim[i]); 790 // } 791 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 792 // printf("The quantity n is %d",n); 793 if (fin) { 794 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 795 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 796 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 797 } 798 if (fout) { 799 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 800 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 801 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 802 } 803 #endif 804 break; 805 } 806 } 807 // if (fin){ 808 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 809 // } 810 // if (fout){ 811 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 812 // } 813 PetscFunctionReturn(0); 814 } 815 816 #undef __FUNCT__ 817 #define __FUNCT__ "InputTransformFFT" 818 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 819 { 820 PetscErrorCode ierr; 821 PetscFunctionBegin; 822 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 823 PetscFunctionReturn(0); 824 } 825 826 /* 827 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real 828 parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst 829 changing dimension. 830 Input A, x, y 831 A - FFTW matrix 832 x - user data 833 Options Database Keys: 834 + -mat_fftw_plannerflags - set FFTW planner flags 835 836 Level: intermediate 837 838 */ 839 840 EXTERN_C_BEGIN 841 #undef __FUNCT__ 842 #define __FUNCT__ "InputTransformFFT_FTTW" 843 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 844 { 845 PetscErrorCode ierr; 846 MPI_Comm comm=((PetscObject)A)->comm; 847 Mat_FFT *fft = (Mat_FFT*)A->data; 848 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 849 PetscInt N=fft->N; 850 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 851 PetscInt low; 852 PetscInt rank,size; 853 ptrdiff_t alloc_local,local_n0,local_0_start; 854 ptrdiff_t local_n1,local_1_start; 855 VecScatter vecscat; 856 IS list1,list2; 857 #if !defined(PETSC_USE_COMPLEX) 858 PetscInt i,j,k,partial_dim; 859 PetscInt *indx1, *indx2, tempindx, tempindx1; 860 PetscInt N1, n1 ,NM; 861 ptrdiff_t temp; 862 #endif 863 864 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 865 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 866 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 867 868 if (size==1) 869 { 870 /* switch (ndim){ 871 case 1: 872 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 873 for (i=0;i<dim[0];i++) 874 { 875 indx1[i] = i; 876 } 877 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 878 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 879 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 882 ierr = ISDestroy(&list1);CHKERRQ(ierr); 883 ierr = PetscFree(indx1);CHKERRQ(ierr); 884 break; 885 886 case 2: 887 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 888 for (i=0;i<dim[0];i++){ 889 for (j=0;j<dim[1];j++){ 890 indx1[i*dim[1]+j] = i*dim[1] + j; 891 } 892 } 893 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 894 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 895 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 898 ierr = ISDestroy(&list1);CHKERRQ(ierr); 899 ierr = PetscFree(indx1);CHKERRQ(ierr); 900 break; 901 case 3: 902 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 903 for (i=0;i<dim[0];i++){ 904 for (j=0;j<dim[1];j++){ 905 for (k=0;k<dim[2];k++){ 906 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 907 } 908 } 909 } 910 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 911 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 912 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 914 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 915 ierr = ISDestroy(&list1);CHKERRQ(ierr); 916 ierr = PetscFree(indx1);CHKERRQ(ierr); 917 break; 918 default: 919 */ 920 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 921 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 922 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 924 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 925 ierr = ISDestroy(&list1);CHKERRQ(ierr); 926 // break; 927 // } 928 } 929 930 else{ 931 932 switch (ndim){ 933 case 1: 934 #if defined(PETSC_USE_COMPLEX) 935 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 936 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 937 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 938 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 939 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 942 ierr = ISDestroy(&list1);CHKERRQ(ierr); 943 ierr = ISDestroy(&list2);CHKERRQ(ierr); 944 #else 945 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 946 #endif 947 break; 948 case 2: 949 #if defined(PETSC_USE_COMPLEX) 950 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 951 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 952 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 953 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 954 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 957 ierr = ISDestroy(&list1);CHKERRQ(ierr); 958 ierr = ISDestroy(&list2);CHKERRQ(ierr); 959 #else 960 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 961 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 962 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 963 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 964 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 965 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 966 967 if (dim[1]%2==0) 968 NM = dim[1]+2; 969 else 970 NM = dim[1]+1; 971 972 for (i=0;i<local_n0;i++){ 973 for (j=0;j<dim[1];j++){ 974 tempindx = i*dim[1] + j; 975 tempindx1 = i*NM + j; 976 indx1[tempindx]=local_0_start*dim[1]+tempindx; 977 indx2[tempindx]=low+tempindx1; 978 } 979 } 980 981 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 982 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 983 984 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 985 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 988 ierr = ISDestroy(&list1);CHKERRQ(ierr); 989 ierr = ISDestroy(&list2);CHKERRQ(ierr); 990 ierr = PetscFree(indx1);CHKERRQ(ierr); 991 ierr = PetscFree(indx2);CHKERRQ(ierr); 992 #endif 993 break; 994 995 case 3: 996 #if defined(PETSC_USE_COMPLEX) 997 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 998 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 999 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1000 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1001 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1002 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1003 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1005 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1006 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1007 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1008 #else 1009 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1010 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1011 1012 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1013 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1014 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1015 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1016 1017 if (dim[2]%2==0) 1018 NM = dim[2]+2; 1019 else 1020 NM = dim[2]+1; 1021 1022 for (i=0;i<local_n0;i++){ 1023 for (j=0;j<dim[1];j++){ 1024 for (k=0;k<dim[2];k++){ 1025 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1026 tempindx1 = i*dim[1]*NM + j*NM + k; 1027 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1028 indx2[tempindx]=low+tempindx1; 1029 } 1030 } 1031 } 1032 1033 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1034 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1035 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1036 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1039 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1040 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1041 ierr = PetscFree(indx1);CHKERRQ(ierr); 1042 ierr = PetscFree(indx2);CHKERRQ(ierr); 1043 #endif 1044 break; 1045 1046 default: 1047 #if defined(PETSC_USE_COMPLEX) 1048 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1049 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1050 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1051 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1052 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1055 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1056 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1057 #else 1058 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1059 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1060 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1061 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1062 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1063 1064 partial_dim = fftw->partial_dim; 1065 1066 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1067 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1068 1069 if (dim[ndim-1]%2==0) 1070 NM = 2; 1071 else 1072 NM = 1; 1073 1074 j = low; 1075 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1076 { 1077 indx1[i] = local_0_start*partial_dim + i; 1078 indx2[i] = j; 1079 if (k%dim[ndim-1]==0) 1080 { j+=NM;} 1081 j++; 1082 } 1083 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1084 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1085 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1086 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1088 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1089 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1090 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1091 ierr = PetscFree(indx1);CHKERRQ(ierr); 1092 ierr = PetscFree(indx2);CHKERRQ(ierr); 1093 #endif 1094 break; 1095 } 1096 } 1097 1098 return 0; 1099 } 1100 EXTERN_C_END 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "OutputTransformFFT" 1104 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 1105 { 1106 PetscErrorCode ierr; 1107 PetscFunctionBegin; 1108 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 /* 1113 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 1114 Input A, x, y 1115 A - FFTW matrix 1116 x - FFTW vector 1117 y - PETSc vector that user can read 1118 Options Database Keys: 1119 + -mat_fftw_plannerflags - set FFTW planner flags 1120 1121 Level: intermediate 1122 1123 */ 1124 1125 EXTERN_C_BEGIN 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "OutputTransformFFT_FTTW" 1128 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 1129 { 1130 PetscErrorCode ierr; 1131 MPI_Comm comm=((PetscObject)A)->comm; 1132 Mat_FFT *fft = (Mat_FFT*)A->data; 1133 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 1134 PetscInt N=fft->N; 1135 PetscInt ndim=fft->ndim,*dim=fft->dim; 1136 PetscInt low; 1137 PetscInt rank,size; 1138 ptrdiff_t alloc_local,local_n0,local_0_start; 1139 ptrdiff_t local_n1,local_1_start; 1140 VecScatter vecscat; 1141 IS list1,list2; 1142 #if !defined(PETSC_USE_COMPLEX) 1143 PetscInt i,j,k,partial_dim; 1144 PetscInt *indx1, *indx2, tempindx, tempindx1; 1145 PetscInt N1, n1 ,NM; 1146 ptrdiff_t temp; 1147 #endif 1148 1149 1150 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1151 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1152 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 1153 1154 if (size==1){ 1155 /* 1156 switch (ndim){ 1157 case 1: 1158 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1159 for (i=0;i<dim[0];i++) 1160 { 1161 indx1[i] = i; 1162 } 1163 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1164 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1165 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1167 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1168 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1169 ierr = PetscFree(indx1);CHKERRQ(ierr); 1170 break; 1171 1172 case 2: 1173 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1174 for (i=0;i<dim[0];i++){ 1175 for (j=0;j<dim[1];j++){ 1176 indx1[i*dim[1]+j] = i*dim[1] + j; 1177 } 1178 } 1179 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1180 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1181 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1182 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1183 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1184 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1185 ierr = PetscFree(indx1);CHKERRQ(ierr); 1186 break; 1187 case 3: 1188 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1189 for (i=0;i<dim[0];i++){ 1190 for (j=0;j<dim[1];j++){ 1191 for (k=0;k<dim[2];k++){ 1192 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1193 } 1194 } 1195 } 1196 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1197 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1198 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1199 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1200 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1201 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1202 ierr = PetscFree(indx1);CHKERRQ(ierr); 1203 break; 1204 default: 1205 */ 1206 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 1207 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1208 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1209 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1210 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1211 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1212 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1213 // break; 1214 // } 1215 } 1216 else{ 1217 1218 switch (ndim){ 1219 case 1: 1220 #if defined(PETSC_USE_COMPLEX) 1221 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1222 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 1223 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 1224 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1225 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1226 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1227 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1229 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1230 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1231 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1232 #else 1233 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1234 #endif 1235 break; 1236 case 2: 1237 #if defined(PETSC_USE_COMPLEX) 1238 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1239 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1240 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1241 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1242 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1244 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1245 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1246 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1247 #else 1248 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1249 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 1250 1251 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1252 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 1253 1254 if (dim[1]%2==0) 1255 NM = dim[1]+2; 1256 else 1257 NM = dim[1]+1; 1258 1259 for (i=0;i<local_n0;i++){ 1260 for (j=0;j<dim[1];j++){ 1261 tempindx = i*dim[1] + j; 1262 tempindx1 = i*NM + j; 1263 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1264 indx2[tempindx]=low+tempindx1; 1265 } 1266 } 1267 1268 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1269 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1270 1271 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1272 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1273 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1274 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1275 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1276 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1277 ierr = PetscFree(indx1);CHKERRQ(ierr); 1278 ierr = PetscFree(indx2);CHKERRQ(ierr); 1279 #endif 1280 break; 1281 case 3: 1282 #if defined(PETSC_USE_COMPLEX) 1283 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1284 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1285 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1286 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1287 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1288 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1289 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1290 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1291 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1292 #else 1293 1294 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1295 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1296 1297 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1298 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1299 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1300 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1301 1302 if (dim[2]%2==0) 1303 NM = dim[2]+2; 1304 else 1305 NM = dim[2]+1; 1306 1307 for (i=0;i<local_n0;i++){ 1308 for (j=0;j<dim[1];j++){ 1309 for (k=0;k<dim[2];k++){ 1310 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1311 tempindx1 = i*dim[1]*NM + j*NM + k; 1312 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1313 indx2[tempindx]=low+tempindx1; 1314 } 1315 // printf("Val tempindx1 = %d\n",tempindx1); 1316 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1317 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1318 // printf("-------------------------\n",indx2[tempindx],rank); 1319 } 1320 } 1321 1322 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1323 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1324 1325 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1326 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1328 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1329 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1330 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1331 ierr = PetscFree(indx1);CHKERRQ(ierr); 1332 ierr = PetscFree(indx2);CHKERRQ(ierr); 1333 #endif 1334 break; 1335 default: 1336 #if defined(PETSC_USE_COMPLEX) 1337 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1338 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1339 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1340 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1341 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1342 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1343 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1344 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1345 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1346 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1347 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1348 #else 1349 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1350 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1351 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1352 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1353 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1354 1355 partial_dim = fftw->partial_dim; 1356 1357 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1358 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1359 1360 if (dim[ndim-1]%2==0) 1361 NM = 2; 1362 else 1363 NM = 1; 1364 1365 j = low; 1366 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1367 { 1368 indx1[i] = local_0_start*partial_dim + i; 1369 indx2[i] = j; 1370 if (k%dim[ndim-1]==0) 1371 { j+=NM;} 1372 j++; 1373 } 1374 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1375 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1376 1377 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1378 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1379 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1380 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1381 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1382 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1383 ierr = PetscFree(indx1);CHKERRQ(ierr); 1384 ierr = PetscFree(indx2);CHKERRQ(ierr); 1385 #endif 1386 break; 1387 } 1388 } 1389 return 0; 1390 } 1391 EXTERN_C_END 1392 1393 EXTERN_C_BEGIN 1394 #undef __FUNCT__ 1395 #define __FUNCT__ "MatCreate_FFTW" 1396 /* 1397 MatCreate_FFTW - Creates a matrix object that provides FFT 1398 via the external package FFTW 1399 Options Database Keys: 1400 + -mat_fftw_plannerflags - set FFTW planner flags 1401 1402 Level: intermediate 1403 1404 */ 1405 1406 PetscErrorCode MatCreate_FFTW(Mat A) 1407 { 1408 PetscErrorCode ierr; 1409 MPI_Comm comm=((PetscObject)A)->comm; 1410 Mat_FFT *fft=(Mat_FFT*)A->data; 1411 Mat_FFTW *fftw; 1412 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1413 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1414 PetscBool flg; 1415 PetscInt p_flag,partial_dim=1,ctr,n1; 1416 PetscMPIInt size,rank; 1417 ptrdiff_t *pdim; 1418 ptrdiff_t local_n1,local_1_start; 1419 #if !defined(PETSC_USE_COMPLEX) 1420 ptrdiff_t temp; 1421 PetscInt N1; 1422 #endif 1423 1424 1425 PetscFunctionBegin; 1426 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1427 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1428 ierr = MPI_Barrier(PETSC_COMM_WORLD); 1429 1430 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1431 pdim[0] = dim[0]; 1432 for (ctr=1;ctr<ndim;ctr++) 1433 { 1434 partial_dim *= dim[ctr]; 1435 pdim[ctr] = dim[ctr]; 1436 } 1437 1438 if (size == 1) { 1439 #if defined(PETSC_USE_COMPLEX) 1440 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1441 n = N; 1442 #else 1443 PetscInt tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1444 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1445 n = tot_dim; 1446 #endif 1447 1448 } else { 1449 ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end; 1450 switch (ndim){ 1451 case 1: 1452 #if !defined(PETSC_USE_COMPLEX) 1453 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1454 #else 1455 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1456 n = (PetscInt)local_n0; 1457 n1 = (PetscInt) local_n1; 1458 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1459 #endif 1460 break; 1461 case 2: 1462 #if defined(PETSC_USE_COMPLEX) 1463 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1464 /* 1465 PetscMPIInt rank; 1466 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1467 PetscSynchronizedFlush(comm); 1468 */ 1469 n = (PetscInt)local_n0*dim[1]; 1470 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1471 #else 1472 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1473 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1474 // n1 = 2*(PetscInt)local_n1*(dim[0]); 1475 // ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1476 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1477 #endif 1478 break; 1479 case 3: 1480 #if defined(PETSC_USE_COMPLEX) 1481 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1482 n = (PetscInt)local_n0*dim[1]*dim[2]; 1483 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1484 #else 1485 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1486 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1487 // n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 1488 // ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1489 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1490 #endif 1491 break; 1492 default: 1493 #if defined(PETSC_USE_COMPLEX) 1494 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1495 n = (PetscInt)local_n0*partial_dim; 1496 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1497 #else 1498 temp = pdim[ndim-1]; 1499 pdim[ndim-1] = temp/2 + 1; 1500 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1501 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1502 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1503 pdim[ndim-1] = temp; 1504 // temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]); 1505 // n1 = 2*local_n1*temp; 1506 // ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr); 1507 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1508 #endif 1509 break; 1510 } 1511 } 1512 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1513 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1514 fft->data = (void*)fftw; 1515 1516 fft->n = n; 1517 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1518 fftw->partial_dim = partial_dim; 1519 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1520 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1521 1522 fftw->p_forward = 0; 1523 fftw->p_backward = 0; 1524 fftw->p_flag = FFTW_ESTIMATE; 1525 1526 if (size == 1){ 1527 A->ops->mult = MatMult_SeqFFTW; 1528 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1529 } else { 1530 A->ops->mult = MatMult_MPIFFTW; 1531 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1532 } 1533 fft->matdestroy = MatDestroy_FFTW; 1534 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1535 //A->ops->getvecs = MatGetVecs_FFTW; 1536 A->assembled = PETSC_TRUE; 1537 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 1538 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1539 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1540 1541 /* get runtime options */ 1542 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1543 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1544 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1545 PetscOptionsEnd(); 1546 PetscFunctionReturn(0); 1547 } 1548 EXTERN_C_END 1549 1550 1551 1552 1553