[153475] contrib/mirror-utils

larryv at macports.org larryv at macports.org
Sun Oct 2 05:51:59 CEST 2016


Revision: 153475
          https://trac.macports.org/changeset/153475
Author:   larryv at macports.org
Date:     2016-10-01 20:51:59 -0700 (Sat, 01 Oct 2016)
Log Message:
-----------
mirror-utils: Add new utility for listing mirrors

fetch-mirror-sites extracts URLs from select online mirror lists and
outputs them in a format suitable for mirror_sites.tcl.

Added Paths:
-----------
    contrib/mirror-utils/fetch-mirror-sites
    contrib/mirror-utils/fetch-mirror-sites.xslt

Added: contrib/mirror-utils/fetch-mirror-sites
===================================================================
--- contrib/mirror-utils/fetch-mirror-sites	                        (rev 0)
+++ contrib/mirror-utils/fetch-mirror-sites	2016-10-02 03:51:59 UTC (rev 153475)
@@ -0,0 +1,112 @@
+#!/usr/bin/env bash
+
+# Download a list of mirrors in HTML format, extract the URLs of FTP and
+# HTTP(S) mirrors, and print them to standard output, terminated by
+# a single slash and a newline.
+# -----
+
+msg() (unset IFS; printf '%s: %s\n' "$0" "$*" >&2)
+err() { msg error: "$@"; }
+warn() { msg warning: "$@"; }
+
+usage() {
+    cat >&2 <<EOF
+usage: $script [-a] [-K <curl config>] [--] <source>
+
+Available sources:
+    ctan / tex / tex_ctan
+    gentoo
+    gnu
+    xorg
+
+See the mirror-utils README for more details.
+EOF
+}
+
+# Avoid $0 (http://mywiki.wooledge.org/BashFAQ/028).
+script=$(basename "$BASH_SOURCE")
+parent=$(dirname "$BASH_SOURCE")
+cd "$parent" || {
+    err "cannot cd(1) into '$parent'"
+    exit 1
+}
+
+# Parse options. Remember to update the mirror-utils README, usage
+# message, and argument validation after changing these.
+unset all_urls curl_config
+while getopts aK: opt; do
+    case $opt in
+        a) all_urls=1 ;;
+        K) curl_config=$OPTARG ;;
+        '?') usage; exit 2 ;;
+    esac
+done
+shift $((OPTIND - 1))
+readonly all_urls curl_config
+
+# Validate arguments.
+if (( $# < 1 )); then
+    err 'source was not specified'
+    false
+elif [[ -z ${curl_config-_} ]]; then
+    err 'path to curl config cannot be empty'
+    false
+fi || { usage; exit 2; }
+
+# Keep these URLs synced with those in the XSLT stylesheet.
+case $1 in
+    ctan|tex|tex_ctan) url=https://ctan.org/mirrors ;;
+    gentoo) url=https://gentoo.org/downloads/mirrors ;;
+    gnu)    url=https://gnu.org/prep/ftp.html ;;
+    xorg)   url=https://x.org/wiki/Releases/Download ;;
+    *)
+        err "invalid source '$1'"
+        usage
+        exit 2
+        ;;
+esac
+readonly url
+
+if (( $# > 1 )); then
+    warn "using source '$1'; ignoring extra arguments"
+fi
+
+# The real work.
+#
+# - Only pass --config to curl if -K was specified for this script
+#   (http://mywiki.wooledge.org/BashFAQ/050). Do not modify the eval
+#   command unless you know what you're doing.
+# - The XSLT stylesheet expects the "url" parameter to be a URL sans
+#   protocol.
+# - The awk script treats "all_urls" as a Boolean.
+eval curl --compressed --location --silent --show-error \
+        "${curl_config+'--config' \"\$curl_config\"}" -- '"$url"' \
+    | xsltproc --html --stringparam url "${url#*://}" "$script.xslt" - \
+    | awk -F '/+' -v all_urls="$all_urls" '
+            /^(ftp|https?):/ {
+                # Terminate with exactly one slash.
+                sub("/*$", "/")
+
+                # If all_urls is false, allow only one URL per FQDN,
+                # favoring HTTPS over HTTP over FTP.
+                key = all_urls ? $0 : $2
+                if ($1 == "ftp:" && urls[key] ~ /^https?:/)
+                    next
+                if ($1 == "http:" && urls[key] ~ /^https:/)
+                    next
+                urls[key] = $0
+                order[key] = count++
+            }
+
+            END {
+                # There should always be *some* input.
+                if (!NR)
+                    exit 1
+
+                # Respect the ordering of the original list.
+                for (key in order)
+                    orderedurls[order[key]] = urls[key]
+                for (i = 0; i < count; ++i)
+                    if (i in orderedurls)
+                        printf "    %s\n", orderedurls[i]
+            }'


Property changes on: contrib/mirror-utils/fetch-mirror-sites
___________________________________________________________________
Added: svn:executable
   + *

Added: contrib/mirror-utils/fetch-mirror-sites.xslt
===================================================================
--- contrib/mirror-utils/fetch-mirror-sites.xslt	                        (rev 0)
+++ contrib/mirror-utils/fetch-mirror-sites.xslt	2016-10-02 03:51:59 UTC (rev 153475)
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    URL extraction for fetch-mirror-sites.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+    <xsl:output method="text" />
+
+    <xsl:template match="/">
+        <!--
+            Keep the URLs synced with the driver script.
+        -->
+        <xsl:choose>
+
+            <!-- CTAN (TeX) -->
+            <xsl:when test="$url='ctan.org/mirrors'">
+                <xsl:apply-templates select="//main/div[@class='left']/a" />
+                <xsl:apply-templates select="//main/div[@class='right']/ul/li/a" />
+            </xsl:when>
+
+            <!-- Gentoo -->
+            <xsl:when test="$url='gentoo.org/downloads/mirrors'">
+                <xsl:apply-templates
+                    select="id('content')//td[last()]/a[code]" />
+            </xsl:when>
+
+            <!-- GNU -->
+            <xsl:when test="$url='gnu.org/prep/ftp.html'">
+                <xsl:apply-templates
+                    select="id('content')//li[not(contains(., '(alpha)'))]/a" />
+            </xsl:when>
+
+            <!-- X.Org -->
+            <xsl:when test="$url='x.org/wiki/Releases/Download'">
+                <xsl:apply-templates select="id('content')//li/a" />
+            </xsl:when>
+
+        </xsl:choose>
+    </xsl:template>
+
+    <!--
+        Assume we're always going to be interested in href and not the
+        link text, which is often something useless like "FTP".
+    -->
+    <xsl:template match="a[@href]">
+        <xsl:apply-templates select="@href" />
+    </xsl:template>
+
+    <xsl:template match="text()|@*">
+        <xsl:value-of select="normalize-space(.)" />
+        <!-- Terminate each URL with a newline. -->
+        <xsl:text>&#xA;</xsl:text>
+    </xsl:template>
+</xsl:stylesheet>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macports.org/pipermail/macports-changes/attachments/20161001/488221c3/attachment-0002.html>


More information about the macports-changes mailing list