1#!/bin/bash 2scriptname=$(basename "$0") 3rundir=${scriptname%.sh} 4TIMEOUT=60 5timeoutfactor=${timeoutfactor:=} 6filter=${filter:=} 7filter_output=${filter_output:=} 8exec=${exec:=} 9executable=${executable:=} 10petsc_dir=${petsc_dir:=} 11testlogtapfile=${testlogtapfile:=} 12testlogerrfile=${testlogerrfile:=} 13label=${label:=} 14 15if test "$PWD"!="$(dirname "$0")"; then 16 cd "$(dirname "$0")" || exit 17 abspath_scriptdir=$PWD 18fi 19if test -d "${rundir}" && test -n "${rundir}"; then 20 rm -f "${rundir}"/*.tmp "${rundir}"/*.err "${rundir}"/*.out 21fi 22mkdir -p "${rundir}" 23if test -n "${runfiles:=}"; then 24 for runfile in ${runfiles}; do 25 subdir=$(dirname "${runfile}") 26 mkdir -p "${rundir}"/"${subdir}" 27 cp -r "${runfile}" "${rundir}"/"${subdir}" 28 done 29fi 30cd "${rundir}" || exit 31 32# 33# Method to print out general and script specific options 34# 35print_usage() { 36 37cat >&2 <<EOF 38Usage: $1 [options] 39 40OPTIONS 41 -a <args> ......... Override default arguments 42 -c ................ Cleanup (remove generated files) 43 -C ................ Compile 44 -d ................ Launch in debugger 45 -e <args> ......... Add extra arguments to default 46 -E <args> ......... Add final arguments to default 47 -f ................ force attempt to run test that would otherwise be skipped 48 -h ................ help: print this message 49 -n <integer> ...... Override the number of processors to use 50 -j ................ Pass -j to petscdiff (just use diff) 51 -J <arg> .......... Pass -J to petscdiff (just use diff with arg) 52 -m ................ Update results using petscdiff 53 -M ................ Update alt files using petscdiff 54 -o <arg> .......... Output format: 'interactive', 'err_only' 55 -p ................ Print command: Print first command and exit 56 -t ................ Override the default timeout (default=$TIMEOUT sec) 57 -U ................ run cUda-memcheck 58 -V ................ run Valgrind 59 -v ................ Verbose: Print commands 60EOF 61 62 if declare -f extrausage > /dev/null; then extrausage; fi 63 exit 1 64} 65### 66## Arguments for overriding things 67# 68output_fmt="interactive" 69verbose=false 70cleanup=false 71compile=false 72debugger=false 73printcmd=false 74mpiexec_function=false 75force=false 76diff_flags="" 77while getopts "a:cCde:E:fhjJ:mMn:o:pt:UvV" arg 78do 79 case $arg in 80 a ) args="$OPTARG" ;; 81 c ) cleanup=true ;; 82 C ) compile=true ;; 83 d ) debugger=true ;; 84 e ) extra_args="$OPTARG" ;; 85 E ) final_args="$OPTARG" ;; 86 f ) force=true ;; 87 h ) print_usage "$0" ;; 88 n ) nsize="$OPTARG" ;; 89 j ) diff_flags=$diff_flags" -j" ;; 90 J ) diff_flags=$diff_flags" -J $OPTARG" ;; 91 m ) diff_flags=$diff_flags" -m" ;; 92 M ) diff_flags=$diff_flags" -M" ;; 93 o ) output_fmt=$OPTARG ;; 94 p ) printcmd=true ;; 95 t ) TIMEOUT=$OPTARG ;; 96 U ) mpiexec="petsc_mpiexec_cudamemcheck $mpiexec" 97 mpiexec_function=true 98 ;; 99 V ) mpiexec="petsc_mpiexec_valgrind $mpiexec" 100 mpiexec_function=true 101 ;; 102 v ) verbose=true ;; 103 *) # To take care of any extra args 104 if test -n "$OPTARG"; then 105 eval "$arg"=\""$OPTARG"\" 106 else 107 eval "$arg"=found 108 fi 109 ;; 110 esac 111done 112shift $(( OPTIND - 1 )) 113 114# Individual tests can extend the default 115export MPIEXEC_TIMEOUT=$((TIMEOUT*timeoutfactor)) 116STARTTIME=$(date +%s) 117 118if test -n "$extra_args"; then 119 args="$extra_args $args" 120fi 121if test -n "$final_args"; then 122 args="$args $final_args" 123fi 124if $debugger; then 125 args="-start_in_debugger $args" 126fi 127if test -n "$filter"; then 128 diff_flags=$diff_flags" -F \$'$filter'" 129fi 130if test -n "$filter_output"; then 131 diff_flags=$diff_flags" -f \$'$filter_output'" 132fi 133 134# Init 135success=0; failed=0; failures=""; rmfiles="" 136total=0 137todo=-1; skip=-1 138job_level=0 139 140if $compile; then 141 curexec=$(basename "${exec}") 142 fullexec=${abspath_scriptdir}/${curexec} 143 maketarget=$(echo "${fullexec}" | sed "s#${petsc_dir}/*##") 144 (cd "$petsc_dir" && make -f gmakefile.test "${maketarget}") 145fi 146 147### 148## Rest of code is functions 149# 150function petsc_report_tapoutput() { 151 notornot=$1 152 test_label=$2 153 comment=$3 154 if test -n "$comment"; then 155 comment=" # ${comment}" 156 fi 157 158 tap_message="${notornot} ok ${test_label}${comment}" 159 160 # Log messages 161 printf '%s\n' "${tap_message}" >> "${testlogtapfile}" 162 163 if test "${output_fmt}" == "err_only"; then 164 if test -n "${notornot}"; then 165 printf '%s\n' "${tap_message}" | tee -a "${testlogerrfile}" 166 fi 167 else 168 printf '%s\n' "${tap_message}" 169 fi 170} 171 172function printcmd() { 173 # Print command that can be run from PETSC_DIR 174 cmd="$1" 175 basedir=$(dirname "${PWD}" | sed "s#${petsc_dir}/##") 176 modcmd=$(echo "${cmd}" | sed -e "s#\.\.#${basedir}#" | sed s#\>.*## | sed s#\%#\%\%#) 177 if $mpiexec_function; then 178 # Have to expand valgrind/cudamemcheck 179 modcmd=$(eval "$modcmd") 180 fi 181 printf '%s\n' "${modcmd}" 182 exit 183} 184 185function petsc_testrun() { 186 # First arg = Basic command 187 # Second arg = stdout file 188 # Third arg = stderr file 189 # Fourth arg = label for reporting 190 rmfiles="${rmfiles} $2 $3" 191 tlabel=$4 192 error=$5 193 cmd="$1 > $2 2> $3" 194 if test -n "$error"; then 195 cmd="$1 1> $2 2>&1" 196 fi 197 echo "$cmd" > "${tlabel}".sh; chmod 755 "${tlabel}".sh 198 if $printcmd; then 199 printcmd "$cmd" 200 fi 201 202 eval "{ time -p $cmd ; } 2>> timing.out" 203 cmd_res=$? 204 # If testing the error output then we don't test the error code itself 205 if test -n "$error"; then 206 cmd_res=0 207 fi 208 # If it is a lack of GPU resources or MPI failure (Intel) then try once more 209 # See: src/sys/error/err.c 210 # Error #134 added to handle problems with the Radeon card for hip testing 211 # Error #144 added to handle problems with the MPI [ch3:sock] received packet of unknown type (1852472100) 212 if [ $cmd_res -eq 96 ] || [ $cmd_res -eq 97 ] || [ $cmd_res -eq 98 ] || [ $cmd_res -eq 134 ] || [ $cmd_res -eq 144 ]; then 213 printf "# retrying %s\n" "${tlabel}" | tee -a "${testlogerrfile}" 214 sleep 3 215 eval "{ time -p $cmd ; } 2>> timing.out" 216 cmd_res=$? 217 fi 218 touch "$2" "$3" 219 # It appears current MPICH and Open MPI just shut down the job execution and do not return an error code to the executable 220 # ETIMEDOUT=110 was used by Open MPI 3.0. MPICH used 255 221 # Earlier Open MPI versions returned 1 and the error string 222 # Here we only grep for error strings in output 223 #if [ $cmd_res -eq 110 -o $cmd_res -eq 255 ] || \ 224 if \ 225 grep -F -q -s 'I_MPI_JOB_TIMEOUT' "$2" "$3" || \ 226 grep -F -q -s 'APPLICATION TIMED OUT' "$2" "$3" || \ 227 grep -F -q -s MPIEXEC_TIMEOUT "$2" "$3" || \ 228 grep -F -q -s 'APPLICATION TERMINATED WITH THE EXIT STRING: job ending due to timeout' "$2" "$3" || \ 229 grep -q -s "Timeout after [0-9]* seconds. Terminating job" "$2" "$3"; then 230 timed_out=1 231 # If timed out, then ensure non-zero error code 232 if [ $cmd_res -eq 0 ]; then 233 cmd_res=1 234 fi 235 fi 236 237 # Report errors 238 comment="" 239 if test $cmd_res == 0; then 240 if "${verbose}"; then 241 comment="${cmd}" 242 fi 243 petsc_report_tapoutput "" "$tlabel" "$comment" 244 (( success=success+1 )) 245 else 246 if [ -n "$timed_out" ]; then 247 comment="Exceeded timeout limit of $MPIEXEC_TIMEOUT s" 248 else 249 comment="Error code: ${cmd_res}" 250 fi 251 petsc_report_tapoutput "not" "$tlabel" "$comment" 252 253 # Report errors in detail 254 if [ -z "$timed_out" ]; then 255 # We've had tests fail but stderr->stdout, as well as having 256 # mpi_abort go to stderr which throws this test off. Show both 257 # with stdout first 258 awk '{print "#\t" $0}' < "$2" | tee -a "${testlogerrfile}" 259 # if statement is for diff tests 260 if test "$2" != "$3"; then 261 awk '{print "#\t" $0}' < "$3" | tee -a "${testlogerrfile}" 262 fi 263 fi 264 (( failed=failed+1 )) 265 failures="$failures $tlabel" 266 fi 267 (( total=success+failed )) 268 return $cmd_res 269} 270 271function petsc_testend() { 272 logfile=$1/counts/${label}.counts 273 logdir=$(dirname "$logfile") 274 if ! test -d "$logdir"; then 275 mkdir -p "$logdir" 276 fi 277 if ! test -e "$logfile"; then 278 touch "$logfile" 279 fi 280 printf "total %s\n" "$total" > "$logfile" 281 printf "success %s\n" "$success" >> "$logfile" 282 printf "failed %s\n" "$failed" >> "$logfile" 283 printf "failures %s\n" "$failures" >> "$logfile" 284 if test ${todo} -gt 0; then 285 printf "todo %s\n" "$todo" >> "$logfile" 286 fi 287 if test ${skip} -gt 0; then 288 printf "skip %s\n" "$skip" >> "$logfile" 289 fi 290 ENDTIME=$(date +%s) 291 timing=$(touch timing.out && grep -E '(user|sys)' timing.out | awk '{if( sum1 == "" || $2 > sum1 ) { sum1=sprintf("%.2f",$2) } ; sum2 += sprintf("%.2f",$2)} END {printf "%.2f %.2f\n",sum1,sum2}') 292 printf "time %s\n" "$timing" >> "$logfile" 293 if $cleanup; then 294 echo "Cleaning up" 295 /bin/rm -f "$rmfiles" 296 fi 297} 298 299function petsc_mpiexec_cudamemcheck() { 300 # loops over the argument list to find the call to the test executable and insert the 301 # cuda memcheck command before it. 302 # first check if compute-sanitizer exists, since cuda-memcheck is deprecated from CUDA 303 # 11-ish onwards 304 if command -v compute-sanitizer &> /dev/null; then 305 memcheck_cmd="${PETSC_CUDAMEMCHECK_COMMAND:-compute-sanitizer}" 306 declare -a default_args_to_check=('--target-processes all' '--track-stream-ordered-races all') 307 else 308 memcheck_cmd="${PETSC_CUDAMEMCHECK_COMMAND:-cuda-memcheck}" 309 declare -a default_args_to_check=('--flush-to-disk yes') 310 fi 311 if [[ -z ${PETSC_CUDAMEMCHECK_ARGS} ]]; then 312 # if user has not set the memcheck args themselves loop over the predefined default 313 # arguments and check if they can be used 314 memcheck_args='--leak-check full --report-api-errors no ' 315 for option in "${default_args_to_check[@]}"; do 316 ${memcheck_cmd} "${memcheck_args}" "${option}" &> /dev/null 317 if [ $? -eq 0 ]; then 318 memcheck_args+="${option} " 319 fi 320 done 321 else 322 memcheck_args="${PETSC_CUDAMEMCHECK_ARGS}" 323 fi 324 pre_args=() 325 # regex to detect where the test lives in the command line. This 326 # marks the end of the options to mpiexec, and hence where we should insert the 327 # cuda-memcheck command 328 re="${executable}" 329 for i in "$@"; do 330 # first occurrence of the presence of petsc_arch is the executable, 331 # except when we install MPI ourselves 332 if [[ $i =~ ${re} ]]; then 333 # found it, put cuda memcheck command in 334 pre_args+=("${memcheck_cmd} ${memcheck_args}") 335 break 336 fi 337 pre_args+=("$i") 338 shift 339 done 340 # run command, but filter out 341 # ===== CUDA-MEMCHECK or ==== COMPUTE-SANITIZER 342 # and 343 # ===== ERROR SUMMARY: 0 errors 344 if ${printcmd}; then 345 echo "${pre_args[@]}" "$@" 346 else 347 "${pre_args[@]}" "$@" \ 348 | grep -v 'CUDA-MEMCHECK' \ 349 | grep -v 'COMPUTE-SANITIZER' \ 350 | grep -v 'LEAK SUMMARY: 0 bytes leaked in 0 allocations' \ 351 | grep -v 'ERROR SUMMARY: 0 errors' || [[ $? == 1 ]] 352 fi 353 # last or is needed to suppress grep exiting with error code 1 if it doesn't find a 354 # match 355} 356 357function petsc_mpiexec_valgrind() { 358 valgrind_cmd="valgrind -q --tool=memcheck --leak-check=yes --num-callers=20 --track-origins=yes --keep-debuginfo=yes --suppressions=${PETSC_DIR}/share/petsc/suppressions/valgrind --error-exitcode=10" 359 pre_args=() 360 re="${executable}" 361 for i in "$@"; do 362 if [[ $i =~ ${re} ]]; then 363 pre_args+=("${valgrind_cmd}") 364 break 365 fi 366 pre_args+=("$i") 367 shift 368 done 369 if ${printcmd}; then 370 echo ${pre_args[@]} "$@" 371 else 372 ${pre_args[@]} "$@" 373 fi 374} 375export LC_ALL=C 376