: ########################################################################## # Shellscript: geturl - get WWW page specified by URL # Version : 0.1 (beta) # Author : Heiner Steven (heiner.steven@odn.de) # Date : 20.02.1997 # Category : Internet # SCCS-Id. : $Id: geturl,v 1.1.1.1 1999/06/15 19:29:05 heiner Exp $ ########################################################################## # Note # o Needs the non-standard program # nc ("net cat") # to do the actual work # o There should be a way to see the header ########################################################################## PN=`basename "$0"` # Program name VER='0.2 (beta)' Header="${TMPDIR:=/tmp}/gu$$" # Temp. file for HTTP response header CMD=GET # HTTP request ("GET" or "HEAD") verbose=no # yes/no printheader=no check=no Usage () { echo >&2 "$PN - get WWW page specified by URL, $VER (hs '97) usage: $PN [-cv] url [...] -c: only check the url, do not get the contents -h: only print header, no contents -v: verbose mode An url has the form http:[//HOST][:PORT]PATH i.e. http://panic.bintec.de:80/index.html" exit 1 } Msg () { for MsgLine do echo "$PN: $MsgLine" >&2 done } Fatal () { Msg "$@"; exit 1; } # Return the parsed URL in the variables # Protocol, Host, Port, Path ParseUrl () { [ $# -ge 1 ] || return 2 U=$1 # URL-Format: http://HOST/DIR/.../FILE Protocol=`expr "$U" : '\([a-zA-Z0-9][a-zA-Z0-9]*\):.*'` [ -n "$Protocol" ] || return 1 # Protocol must be specified U=`expr "$U" : "$Protocol:\(.*\)"` # Remove protocol H=`expr "$U" : '//\([^/][^/]*\).*'` # hostname:portnumber if [ -n "$H" ] then Host=`expr "$H" : '\([^:][^:]*\).*'` Port=`expr "$H" : '[^:][^:]*:\([0-9][0-9]*\).*'` U=`expr "$U" : "//[^/][^/]*\(.*\)"` # Remove hostname:portnumber fi Path="$U" return 0 } # Strip header from HTTP response, and write it to a file. # A header is terminated by an empty line StripHeader () { # Line terminator may be CR LF (instead of LF) cat "$@" | ( OIFS="$IFS"; IFS="" while read line do echo "$line" case "$line" in ""|" ") break; esac done > "$Header" IFS="$OIFS" cat) return 0 } getopt hcv "$@" >/dev/null 2>&1 || Usage set -- `getopt hcv "$@"` while [ $# -gt 0 ] do case "$1" in -c) CMD=HEAD; check=yes;; # only check the file -v) verbose=yes;; -h) CMD=HEAD; printheader=yes;; --) shift; break;; -h) Usage;; -*) Usage;; *) break;; # First file name esac shift done # Remove temporary files at signal or exit trap 'rm -f "$Header" >/dev/null 2>&1' 0 trap "exit 2" 1 2 3 15 ExitCode=0 for Url do ParseUrl "$Url" || { Msg "malformed URL: $Url"; continue; } [ "${Protocol:-INVALID}" = http ] || { Msg "unknown protocol \"$Protocol\" (only \"http\" allowed)"; continue; } # Set default values : ${CMD:=GET} : ${Host:=127.0.0.1} : ${Port:=80} : ${Path:=/} [ $verbose = yes ] && Msg "getting http://$Host:$Port$Path" echo "${CMD:-GET} ${Path:-/} HTTP/1.0 \r " | nc ${Host:=127.0.0.1} ${Port:=80} | StripHeader || { Msg "could not connect to $Host:$Port"; continue; } if [ $printheader = yes ] then cat "$Header" fi if [ $check = yes ] then # Search response header for state line "HTTP/1.0 200 OK" sed q "$Header" | grep "[ ]200[ ]" >/dev/null 2>&1 ExitCode=$? [ $ExitCode -eq 0 ] && Msg "URL o.k." || Msg "URL not o.k." fi done exit $ExitCode