[153475] contrib/mirror-utils
larryv at macports.org
larryv at macports.org
Sun Oct 2 05:51:59 CEST 2016
Revision: 153475
https://trac.macports.org/changeset/153475
Author: larryv at macports.org
Date: 2016-10-01 20:51:59 -0700 (Sat, 01 Oct 2016)
Log Message:
-----------
mirror-utils: Add new utility for listing mirrors
fetch-mirror-sites extracts URLs from select online mirror lists and
outputs them in a format suitable for mirror_sites.tcl.
Added Paths:
-----------
contrib/mirror-utils/fetch-mirror-sites
contrib/mirror-utils/fetch-mirror-sites.xslt
Added: contrib/mirror-utils/fetch-mirror-sites
===================================================================
--- contrib/mirror-utils/fetch-mirror-sites (rev 0)
+++ contrib/mirror-utils/fetch-mirror-sites 2016-10-02 03:51:59 UTC (rev 153475)
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+
+# Download a list of mirrors in HTML format, extract the URLs of FTP and
+# HTTP(S) mirrors, and print them to standard output, terminated by
+# a single slash and a newline.
+# -----
+
+msg() (unset IFS; printf '%s: %s\n' "$0" "$*" >&2)
+err() { msg error: "$@"; }
+warn() { msg warning: "$@"; }
+
+usage() {
+ cat >&2 <<EOF
+usage: $script [-a] [-K <curl config>] [--] <source>
+
+Available sources:
+ ctan / tex / tex_ctan
+ gentoo
+ gnu
+ xorg
+
+See the mirror-utils README for more details.
+EOF
+}
+
+# Avoid $0 (http://mywiki.wooledge.org/BashFAQ/028).
+script=$(basename "$BASH_SOURCE")
+parent=$(dirname "$BASH_SOURCE")
+cd "$parent" || {
+ err "cannot cd(1) into '$parent'"
+ exit 1
+}
+
+# Parse options. Remember to update the mirror-utils README, usage
+# message, and argument validation after changing these.
+unset all_urls curl_config
+while getopts aK: opt; do
+ case $opt in
+ a) all_urls=1 ;;
+ K) curl_config=$OPTARG ;;
+ '?') usage; exit 2 ;;
+ esac
+done
+shift $((OPTIND - 1))
+readonly all_urls curl_config
+
+# Validate arguments.
+if (( $# < 1 )); then
+ err 'source was not specified'
+ false
+elif [[ -z ${curl_config-_} ]]; then
+ err 'path to curl config cannot be empty'
+ false
+fi || { usage; exit 2; }
+
+# Keep these URLs synced with those in the XSLT stylesheet.
+case $1 in
+ ctan|tex|tex_ctan) url=https://ctan.org/mirrors ;;
+ gentoo) url=https://gentoo.org/downloads/mirrors ;;
+ gnu) url=https://gnu.org/prep/ftp.html ;;
+ xorg) url=https://x.org/wiki/Releases/Download ;;
+ *)
+ err "invalid source '$1'"
+ usage
+ exit 2
+ ;;
+esac
+readonly url
+
+if (( $# > 1 )); then
+ warn "using source '$1'; ignoring extra arguments"
+fi
+
+# The real work.
+#
+# - Only pass --config to curl if -K was specified for this script
+# (http://mywiki.wooledge.org/BashFAQ/050). Do not modify the eval
+# command unless you know what you're doing.
+# - The XSLT stylesheet expects the "url" parameter to be a URL sans
+# protocol.
+# - The awk script treats "all_urls" as a Boolean.
+eval curl --compressed --location --silent --show-error \
+ "${curl_config+'--config' \"\$curl_config\"}" -- '"$url"' \
+ | xsltproc --html --stringparam url "${url#*://}" "$script.xslt" - \
+ | awk -F '/+' -v all_urls="$all_urls" '
+ /^(ftp|https?):/ {
+ # Terminate with exactly one slash.
+ sub("/*$", "/")
+
+ # If all_urls is false, allow only one URL per FQDN,
+ # favoring HTTPS over HTTP over FTP.
+ key = all_urls ? $0 : $2
+ if ($1 == "ftp:" && urls[key] ~ /^https?:/)
+ next
+ if ($1 == "http:" && urls[key] ~ /^https:/)
+ next
+ urls[key] = $0
+ order[key] = count++
+ }
+
+ END {
+ # There should always be *some* input.
+ if (!NR)
+ exit 1
+
+ # Respect the ordering of the original list.
+ for (key in order)
+ orderedurls[order[key]] = urls[key]
+ for (i = 0; i < count; ++i)
+ if (i in orderedurls)
+ printf " %s\n", orderedurls[i]
+ }'
Property changes on: contrib/mirror-utils/fetch-mirror-sites
___________________________________________________________________
Added: svn:executable
+ *
Added: contrib/mirror-utils/fetch-mirror-sites.xslt
===================================================================
--- contrib/mirror-utils/fetch-mirror-sites.xslt (rev 0)
+++ contrib/mirror-utils/fetch-mirror-sites.xslt 2016-10-02 03:51:59 UTC (rev 153475)
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ URL extraction for fetch-mirror-sites.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+ <xsl:output method="text" />
+
+ <xsl:template match="/">
+ <!--
+ Keep the URLs synced with the driver script.
+ -->
+ <xsl:choose>
+
+ <!-- CTAN (TeX) -->
+ <xsl:when test="$url='ctan.org/mirrors'">
+ <xsl:apply-templates select="//main/div[@class='left']/a" />
+ <xsl:apply-templates select="//main/div[@class='right']/ul/li/a" />
+ </xsl:when>
+
+ <!-- Gentoo -->
+ <xsl:when test="$url='gentoo.org/downloads/mirrors'">
+ <xsl:apply-templates
+ select="id('content')//td[last()]/a[code]" />
+ </xsl:when>
+
+ <!-- GNU -->
+ <xsl:when test="$url='gnu.org/prep/ftp.html'">
+ <xsl:apply-templates
+ select="id('content')//li[not(contains(., '(alpha)'))]/a" />
+ </xsl:when>
+
+ <!-- X.Org -->
+ <xsl:when test="$url='x.org/wiki/Releases/Download'">
+ <xsl:apply-templates select="id('content')//li/a" />
+ </xsl:when>
+
+ </xsl:choose>
+ </xsl:template>
+
+ <!--
+ Assume we're always going to be interested in href and not the
+ link text, which is often something useless like "FTP".
+ -->
+ <xsl:template match="a[@href]">
+ <xsl:apply-templates select="@href" />
+ </xsl:template>
+
+ <xsl:template match="text()|@*">
+ <xsl:value-of select="normalize-space(.)" />
+ <!-- Terminate each URL with a newline. -->
+ <xsl:text>
</xsl:text>
+ </xsl:template>
+</xsl:stylesheet>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macports.org/pipermail/macports-changes/attachments/20161001/488221c3/attachment-0002.html>
More information about the macports-changes
mailing list