xref: /petsc/src/sys/fileio/fretrieve.c (revision dd91dc9deb5defeb04e406ee3787a84bccc26ca6)
1 
2 /*
3       Code for opening and closing files.
4 */
5 #include <petscsys.h>
6 #if defined(PETSC_HAVE_PWD_H)
7   #include <pwd.h>
8 #endif
9 #include <ctype.h>
10 #include <sys/stat.h>
11 #if defined(PETSC_HAVE_UNISTD_H)
12   #include <unistd.h>
13 #endif
14 #if defined(PETSC_HAVE_SYS_UTSNAME_H)
15   #include <sys/utsname.h>
16 #endif
17 #include <fcntl.h>
18 #include <time.h>
19 #if defined(PETSC_HAVE_SYS_SYSTEMINFO_H)
20   #include <sys/systeminfo.h>
21 #endif
22 #include <petsc/private/petscimpl.h>
23 
24 /*
25    Private routine to delete tmp/shared storage
26 
27    This is called by MPI, not by users, when communicator attributes are deleted
28 
29    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
30 
31 */
32 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelTmpShared(MPI_Comm comm, PetscMPIInt keyval, void *count_val, void *extra_state)
33 {
34   PetscFunctionBegin;
35   PetscCallMPI(PetscInfo(NULL, "Deleting tmp/shared data in an MPI_Comm %ld\n", (long)comm));
36   PetscCallMPI(PetscFree(count_val));
37   PetscFunctionReturn(MPI_SUCCESS);
38 }
39 
40 // "Unknown section 'Environmental Variables'"
41 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
42 /*@C
43   PetscGetTmp - Gets the name of the "tmp" directory, often this is `/tmp`
44 
45   Collective
46 
47   Input Parameters:
48 + comm - MPI_Communicator that may share tmp
49 - len  - length of string to hold name
50 
51   Output Parameter:
52 . dir - directory name
53 
54   Options Database Keys:
55 + -shared_tmp     - indicates the directory is known to be shared among the MPI processes
56 . -not_shared_tmp - indicates the directory is known to be not shared among the MPI processes
57 - -tmp tmpdir     - name of the directory you wish to use as tmp
58 
59   Environmental Variables:
60 + `PETSC_SHARED_TMP`     - indicates the directory is known to be shared among the MPI processes
61 . `PETSC_NOT_SHARED_TMP` - indicates the directory is known to be not shared among the MPI processes
62 - `PETSC_TMP`            - name of the directory you wish to use as tmp
63 
64   Level: developer
65 
66 .seealso: `PetscSharedTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
67 @*/
68 PetscErrorCode PetscGetTmp(MPI_Comm comm, char dir[], size_t len)
69 {
70   PetscBool flg;
71 
72   PetscFunctionBegin;
73   PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", dir, len, &flg));
74   if (!flg) PetscCall(PetscStrncpy(dir, "/tmp", len));
75   PetscFunctionReturn(PETSC_SUCCESS);
76 }
77 
78 // "Unknown section 'Environmental Variables'"
79 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
80 /*@C
81   PetscSharedTmp - Determines if all processors in a communicator share a
82   tmp directory or have different ones.
83 
84   Collective
85 
86   Input Parameter:
87 . comm - MPI_Communicator that may share tmp
88 
89   Output Parameter:
90 . shared - `PETSC_TRUE` or `PETSC_FALSE`
91 
92   Options Database Keys:
93 + -shared_tmp     - indicates the directory is known to be shared among the MPI processes
94 . -not_shared_tmp - indicates the directory is known to be not shared among the MPI processes
95 - -tmp tmpdir     - name of the directory you wish to use as tmp
96 
97   Environmental Variables:
98 + `PETSC_SHARED_TMP`     - indicates the directory is known to be shared among the MPI processes
99 . `PETSC_NOT_SHARED_TMP` - indicates the directory is known to be not shared among the MPI processes
100 - `PETSC_TMP`            - name of the directory you wish to use as tmp
101 
102   Level: developer
103 
104   Notes:
105   Stores the status as a MPI attribute so it does not have
106   to be redetermined each time.
107 
108   Assumes that all processors in a communicator either
109   1) have a common tmp or
110   2) each has a separate tmp
111   eventually we can write a fancier one that determines which processors
112   share a common tmp.
113 
114   This will be very slow on runs with a large number of processors since
115   it requires O(p*p) file opens.
116 
117   If the environmental variable `PETSC_TMP` is set it will use this directory
118   as the "tmp" directory.
119 
120 .seealso: `PetscGetTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
121 @*/
122 PetscErrorCode PetscSharedTmp(MPI_Comm comm, PetscBool *shared)
123 {
124   PetscMPIInt size, rank, *tagvalp, sum, cnt, i;
125   PetscBool   flg, iflg;
126   FILE       *fd;
127   int         err;
128 
129   PetscFunctionBegin;
130   PetscCallMPI(MPI_Comm_size(comm, &size));
131   if (size == 1) {
132     *shared = PETSC_TRUE;
133     PetscFunctionReturn(PETSC_SUCCESS);
134   }
135 
136   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_TMP", NULL, 0, &flg));
137   if (flg) {
138     *shared = PETSC_TRUE;
139     PetscFunctionReturn(PETSC_SUCCESS);
140   }
141 
142   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_TMP", NULL, 0, &flg));
143   if (flg) {
144     *shared = PETSC_FALSE;
145     PetscFunctionReturn(PETSC_SUCCESS);
146   }
147 
148   if (Petsc_SharedTmp_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_SharedTmp_keyval, NULL));
149 
150   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_SharedTmp_keyval, (void **)&tagvalp, (int *)&iflg));
151   if (!iflg) {
152     char filename[PETSC_MAX_PATH_LEN], tmpname[PETSC_MAX_PATH_LEN];
153 
154     /* This communicator does not yet have a shared tmp attribute */
155     PetscCall(PetscMalloc1(1, &tagvalp));
156     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_SharedTmp_keyval, tagvalp));
157 
158     PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", tmpname, 238, &iflg));
159     if (!iflg) {
160       PetscCall(PetscStrncpy(filename, "/tmp", sizeof(filename)));
161     } else {
162       PetscCall(PetscStrncpy(filename, tmpname, sizeof(filename)));
163     }
164 
165     PetscCall(PetscStrlcat(filename, "/petsctestshared", sizeof(filename)));
166     PetscCallMPI(MPI_Comm_rank(comm, &rank));
167 
168     /* each processor creates a /tmp file and all the later ones check */
169     /* this makes sure no subset of processors is shared */
170     *shared = PETSC_FALSE;
171     for (i = 0; i < size - 1; i++) {
172       if (rank == i) {
173         fd = fopen(filename, "w");
174         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
175         err = fclose(fd);
176         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
177       }
178       PetscCallMPI(MPI_Barrier(comm));
179       if (rank >= i) {
180         fd = fopen(filename, "r");
181         if (fd) cnt = 1;
182         else cnt = 0;
183         if (fd) {
184           err = fclose(fd);
185           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
186         }
187       } else cnt = 0;
188 
189       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
190       if (rank == i) unlink(filename);
191 
192       if (sum == size) {
193         *shared = PETSC_TRUE;
194         break;
195       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share /tmp ");
196     }
197     *tagvalp = (int)*shared;
198     PetscCall(PetscInfo(NULL, "processors %s %s\n", (*shared) ? "share" : "do NOT share", (iflg ? tmpname : "/tmp")));
199   } else *shared = (PetscBool)*tagvalp;
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 // "Unknown section 'Environmental Variables'"
204 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
205 /*@C
206   PetscSharedWorkingDirectory - Determines if all processors in a communicator share a working directory or have different ones.
207 
208   Collective
209 
210   Input Parameter:
211 . comm - MPI_Communicator that may share working directory
212 
213   Output Parameter:
214 . shared - `PETSC_TRUE` or `PETSC_FALSE`
215 
216   Options Database Keys:
217 + -shared_working_directory     - indicates the directory is known to be shared among the MPI processes
218 - -not_shared_working_directory - indicates the directory is known to be not shared among the MPI processes
219 
220   Environmental Variables:
221 + `PETSC_SHARED_WORKING_DIRECTORY`     - indicates the directory is known to be shared among the MPI processes
222 - `PETSC_NOT_SHARED_WORKING_DIRECTORY` - indicates the directory is known to be not shared among the MPI processes
223 
224   Level: developer
225 
226   Notes:
227   Stores the status as a MPI attribute so it does not have to be redetermined each time.
228 
229   Assumes that all processors in a communicator either
230 .vb
231    1) have a common working directory or
232    2) each has a separate working directory
233 .ve
234   eventually we can write a fancier one that determines which processors share a common working directory.
235 
236   This will be very slow on runs with a large number of processors since it requires O(p*p) file opens.
237 
238 .seealso: `PetscGetTmp()`, `PetscSharedTmp()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
239 @*/
240 PetscErrorCode PetscSharedWorkingDirectory(MPI_Comm comm, PetscBool *shared)
241 {
242   PetscMPIInt size, rank, *tagvalp, sum, cnt, i;
243   PetscBool   flg, iflg;
244   FILE       *fd;
245   int         err;
246 
247   PetscFunctionBegin;
248   PetscCallMPI(MPI_Comm_size(comm, &size));
249   if (size == 1) {
250     *shared = PETSC_TRUE;
251     PetscFunctionReturn(PETSC_SUCCESS);
252   }
253 
254   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
255   if (flg) {
256     *shared = PETSC_TRUE;
257     PetscFunctionReturn(PETSC_SUCCESS);
258   }
259 
260   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
261   if (flg) {
262     *shared = PETSC_FALSE;
263     PetscFunctionReturn(PETSC_SUCCESS);
264   }
265 
266   if (Petsc_SharedWD_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_SharedWD_keyval, NULL));
267 
268   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_SharedWD_keyval, (void **)&tagvalp, (int *)&iflg));
269   if (!iflg) {
270     char filename[PETSC_MAX_PATH_LEN];
271 
272     /* This communicator does not yet have a shared  attribute */
273     PetscCall(PetscMalloc1(1, &tagvalp));
274     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_SharedWD_keyval, tagvalp));
275 
276     PetscCall(PetscGetWorkingDirectory(filename, sizeof(filename) - 16));
277     PetscCall(PetscStrlcat(filename, "/petsctestshared", sizeof(filename)));
278     PetscCallMPI(MPI_Comm_rank(comm, &rank));
279 
280     /* each processor creates a  file and all the later ones check */
281     /* this makes sure no subset of processors is shared */
282     *shared = PETSC_FALSE;
283     for (i = 0; i < size - 1; i++) {
284       if (rank == i) {
285         fd = fopen(filename, "w");
286         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
287         err = fclose(fd);
288         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
289       }
290       PetscCallMPI(MPI_Barrier(comm));
291       if (rank >= i) {
292         fd = fopen(filename, "r");
293         if (fd) cnt = 1;
294         else cnt = 0;
295         if (fd) {
296           err = fclose(fd);
297           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
298         }
299       } else cnt = 0;
300 
301       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
302       if (rank == i) unlink(filename);
303 
304       if (sum == size) {
305         *shared = PETSC_TRUE;
306         break;
307       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share working directory");
308     }
309     *tagvalp = (int)*shared;
310   } else *shared = (PetscBool)*tagvalp;
311   PetscCall(PetscInfo(NULL, "processors %s working directory\n", (*shared) ? "shared" : "do NOT share"));
312   PetscFunctionReturn(PETSC_SUCCESS);
313 }
314 
315 /*@C
316   PetscFileRetrieve - Obtains a file from a URL or a compressed file
317   and copies into local disk space as uncompressed.
318 
319   Collective
320 
321   Input Parameters:
322 + comm - processors accessing the file
323 . url  - name of file, including entire URL (with or without .gz)
324 - llen - length of `localname`
325 
326   Output Parameters:
327 + localname - name of local copy of file - valid on only process zero
328 - found     - if found or retrieved the file - valid on all processes
329 
330   Level: developer
331 
332   Note:
333   if the file already exists locally this function just returns without downloading it.
334 
335 .seealso: `PetscDLLibraryRetrieve()`
336 @*/
337 PetscErrorCode PetscFileRetrieve(MPI_Comm comm, const char url[], char localname[], size_t llen, PetscBool *found)
338 {
339   char        buffer[PETSC_MAX_PATH_LEN], *par = NULL, *tlocalname = NULL, name[PETSC_MAX_PATH_LEN];
340   FILE       *fp;
341   PetscMPIInt rank;
342   size_t      len = 0;
343   PetscBool   flg1, flg2, flg3, flg4, download, compressed = PETSC_FALSE;
344 
345   PetscFunctionBegin;
346   PetscCallMPI(MPI_Comm_rank(comm, &rank));
347   if (rank == 0) {
348     *found = PETSC_FALSE;
349 
350     PetscCall(PetscStrstr(url, ".gz", &par));
351     if (par) {
352       PetscCall(PetscStrlen(par, &len));
353       if (len == 3) compressed = PETSC_TRUE;
354     }
355 
356     PetscCall(PetscStrncmp(url, "ftp://", 6, &flg1));
357     PetscCall(PetscStrncmp(url, "http://", 7, &flg2));
358     PetscCall(PetscStrncmp(url, "file://", 7, &flg3));
359     PetscCall(PetscStrncmp(url, "https://", 8, &flg4));
360     download = (PetscBool)(flg1 || flg2 || flg3 || flg4);
361 
362     if (!download && !compressed) {
363       PetscCall(PetscStrncpy(localname, url, llen));
364       PetscCall(PetscTestFile(url, 'r', found));
365       if (*found) {
366         PetscCall(PetscInfo(NULL, "Found file %s\n", url));
367       } else {
368         PetscCall(PetscInfo(NULL, "Did not find file %s\n", url));
369       }
370       goto done;
371     }
372 
373     /* look for uncompressed file in requested directory */
374     if (compressed) {
375       PetscCall(PetscStrncpy(localname, url, llen));
376       PetscCall(PetscStrstr(localname, ".gz", &par));
377       *par = 0; /* remove .gz extension */
378       PetscCall(PetscTestFile(localname, 'r', found));
379       if (*found) goto done;
380     }
381 
382     /* look for file in current directory */
383     PetscCall(PetscStrrchr(url, '/', &tlocalname));
384     PetscCall(PetscStrncpy(localname, tlocalname, llen));
385     if (compressed) {
386       PetscCall(PetscStrstr(localname, ".gz", &par));
387       *par = 0; /* remove .gz extension */
388     }
389     PetscCall(PetscTestFile(localname, 'r', found));
390     if (*found) goto done;
391 
392     if (download) {
393       /* local file is not already here so use curl to get it */
394       PetscCall(PetscStrncpy(localname, tlocalname, llen));
395       PetscCall(PetscStrncpy(buffer, "curl --fail --silent --show-error ", sizeof(buffer)));
396       PetscCall(PetscStrlcat(buffer, url, sizeof(buffer)));
397       PetscCall(PetscStrlcat(buffer, " > ", sizeof(buffer)));
398       PetscCall(PetscStrlcat(buffer, localname, sizeof(buffer)));
399 #if defined(PETSC_HAVE_POPEN)
400       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
401       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
402 #else
403       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
404 #endif
405       PetscCall(PetscTestFile(localname, 'r', found));
406       if (*found) {
407         FILE *fd;
408         char  buf[1024], *str, *substring;
409 
410         /* check if the file didn't exist so it downloaded an HTML message instead */
411         fd = fopen(localname, "r");
412         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscTestFile() indicates %s exists but fopen() cannot open it", localname);
413         str = fgets(buf, sizeof(buf) - 1, fd);
414         while (str) {
415           PetscCall(PetscStrstr(buf, "<!DOCTYPE html>", &substring));
416           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
417           PetscCall(PetscStrstr(buf, "Not Found", &substring));
418           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
419           str = fgets(buf, sizeof(buf) - 1, fd);
420         }
421         fclose(fd);
422       }
423     } else if (compressed) {
424       PetscCall(PetscTestFile(url, 'r', found));
425       if (!*found) goto done;
426       PetscCall(PetscStrncpy(localname, url, llen));
427     }
428     if (compressed) {
429       PetscCall(PetscStrrchr(localname, '/', &tlocalname));
430       PetscCall(PetscStrncpy(name, tlocalname, PETSC_MAX_PATH_LEN));
431       PetscCall(PetscStrstr(name, ".gz", &par));
432       *par = 0; /* remove .gz extension */
433       /* uncompress file */
434       PetscCall(PetscStrncpy(buffer, "gzip -c -d ", sizeof(buffer)));
435       PetscCall(PetscStrlcat(buffer, localname, sizeof(buffer)));
436       PetscCall(PetscStrlcat(buffer, " > ", sizeof(buffer)));
437       PetscCall(PetscStrlcat(buffer, name, sizeof(buffer)));
438 #if defined(PETSC_HAVE_POPEN)
439       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
440       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
441 #else
442       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
443 #endif
444       PetscCall(PetscStrncpy(localname, name, llen));
445       PetscCall(PetscTestFile(localname, 'r', found));
446     }
447   }
448 done:
449   PetscCallMPI(MPI_Bcast(found, 1, MPIU_BOOL, 0, comm));
450   PetscCallMPI(MPI_Bcast(localname, llen, MPI_CHAR, 0, comm));
451   PetscFunctionReturn(PETSC_SUCCESS);
452 }
453