<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[153475] contrib/mirror-utils</title>
</head>
<body>
<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="https://trac.macports.org/changeset/153475">153475</a></dd>
<dt>Author</dt> <dd>larryv@macports.org</dd>
<dt>Date</dt> <dd>2016-10-01 20:51:59 -0700 (Sat, 01 Oct 2016)</dd>
</dl>
<h3>Log Message</h3>
<pre>mirror-utils: Add new utility for listing mirrors
fetch-mirror-sites extracts URLs from select online mirror lists and
outputs them in a format suitable for mirror_sites.tcl.</pre>
<h3>Added Paths</h3>
<ul>
<li><a href="#contribmirrorutilsfetchmirrorsites">contrib/mirror-utils/fetch-mirror-sites</a></li>
<li><a href="#contribmirrorutilsfetchmirrorsitesxslt">contrib/mirror-utils/fetch-mirror-sites.xslt</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="contribmirrorutilsfetchmirrorsites"></a>
<div class="addfile"><h4>Added: contrib/mirror-utils/fetch-mirror-sites (0 => 153475)</h4>
<pre class="diff"><span>
<span class="info">--- contrib/mirror-utils/fetch-mirror-sites (rev 0)
+++ contrib/mirror-utils/fetch-mirror-sites 2016-10-02 03:51:59 UTC (rev 153475)
</span><span class="lines">@@ -0,0 +1,112 @@
</span><ins>+#!/usr/bin/env bash
+
+# Download a list of mirrors in HTML format, extract the URLs of FTP and
+# HTTP(S) mirrors, and print them to standard output, terminated by
+# a single slash and a newline.
+# -----
+
+msg() (unset IFS; printf '%s: %s\n' "$0" "$*" >&2)
+err() { msg error: "$@"; }
+warn() { msg warning: "$@"; }
+
+usage() {
+ cat >&2 <<EOF
+usage: $script [-a] [-K <curl config>] [--] <source>
+
+Available sources:
+ ctan / tex / tex_ctan
+ gentoo
+ gnu
+ xorg
+
+See the mirror-utils README for more details.
+EOF
+}
+
+# Avoid $0 (http://mywiki.wooledge.org/BashFAQ/028).
+script=$(basename "$BASH_SOURCE")
+parent=$(dirname "$BASH_SOURCE")
+cd "$parent" || {
+ err "cannot cd(1) into '$parent'"
+ exit 1
+}
+
+# Parse options. Remember to update the mirror-utils README, usage
+# message, and argument validation after changing these.
+unset all_urls curl_config
+while getopts aK: opt; do
+ case $opt in
+ a) all_urls=1 ;;
+ K) curl_config=$OPTARG ;;
+ '?') usage; exit 2 ;;
+ esac
+done
+shift $((OPTIND - 1))
+readonly all_urls curl_config
+
+# Validate arguments.
+if (( $# < 1 )); then
+ err 'source was not specified'
+ false
+elif [[ -z ${curl_config-_} ]]; then
+ err 'path to curl config cannot be empty'
+ false
+fi || { usage; exit 2; }
+
+# Keep these URLs synced with those in the XSLT stylesheet.
+case $1 in
+ ctan|tex|tex_ctan) url=https://ctan.org/mirrors ;;
+ gentoo) url=https://gentoo.org/downloads/mirrors ;;
+ gnu) url=https://gnu.org/prep/ftp.html ;;
+ xorg) url=https://x.org/wiki/Releases/Download ;;
+ *)
+ err "invalid source '$1'"
+ usage
+ exit 2
+ ;;
+esac
+readonly url
+
+if (( $# > 1 )); then
+ warn "using source '$1'; ignoring extra arguments"
+fi
+
+# The real work.
+#
+# - Only pass --config to curl if -K was specified for this script
+# (http://mywiki.wooledge.org/BashFAQ/050). Do not modify the eval
+# command unless you know what you're doing.
+# - The XSLT stylesheet expects the "url" parameter to be a URL sans
+# protocol.
+# - The awk script treats "all_urls" as a Boolean.
+eval curl --compressed --location --silent --show-error \
+ "${curl_config+'--config' \"\$curl_config\"}" -- '"$url"' \
+ | xsltproc --html --stringparam url "${url#*://}" "$script.xslt" - \
+ | awk -F '/+' -v all_urls="$all_urls" '
+ /^(ftp|https?):/ {
+ # Terminate with exactly one slash.
+ sub("/*$", "/")
+
+ # If all_urls is false, allow only one URL per FQDN,
+ # favoring HTTPS over HTTP over FTP.
+ key = all_urls ? $0 : $2
+ if ($1 == "ftp:" && urls[key] ~ /^https?:/)
+ next
+ if ($1 == "http:" && urls[key] ~ /^https:/)
+ next
+ urls[key] = $0
+ order[key] = count++
+ }
+
+ END {
+ # There should always be *some* input.
+ if (!NR)
+ exit 1
+
+ # Respect the ordering of the original list.
+ for (key in order)
+ orderedurls[order[key]] = urls[key]
+ for (i = 0; i < count; ++i)
+ if (i in orderedurls)
+ printf " %s\n", orderedurls[i]
+ }'
</ins><span class="cx">Property changes on: contrib/mirror-utils/fetch-mirror-sites
</span><span class="cx">___________________________________________________________________
</span></span></pre></div>
<a id="svnexecutable"></a>
<div class="addfile"><h4>Added: svn:executable</h4></div>
<a id="contribmirrorutilsfetchmirrorsitesxslt"></a>
<div class="addfile"><h4>Added: contrib/mirror-utils/fetch-mirror-sites.xslt (0 => 153475)</h4>
<pre class="diff"><span>
<span class="info">--- contrib/mirror-utils/fetch-mirror-sites.xslt (rev 0)
+++ contrib/mirror-utils/fetch-mirror-sites.xslt 2016-10-02 03:51:59 UTC (rev 153475)
</span><span class="lines">@@ -0,0 +1,53 @@
</span><ins>+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ URL extraction for fetch-mirror-sites.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+ <xsl:output method="text" />
+
+ <xsl:template match="/">
+ <!--
+ Keep the URLs synced with the driver script.
+ -->
+ <xsl:choose>
+
+ <!-- CTAN (TeX) -->
+ <xsl:when test="$url='ctan.org/mirrors'">
+ <xsl:apply-templates select="//main/div[@class='left']/a" />
+ <xsl:apply-templates select="//main/div[@class='right']/ul/li/a" />
+ </xsl:when>
+
+ <!-- Gentoo -->
+ <xsl:when test="$url='gentoo.org/downloads/mirrors'">
+ <xsl:apply-templates
+ select="id('content')//td[last()]/a[code]" />
+ </xsl:when>
+
+ <!-- GNU -->
+ <xsl:when test="$url='gnu.org/prep/ftp.html'">
+ <xsl:apply-templates
+ select="id('content')//li[not(contains(., '(alpha)'))]/a" />
+ </xsl:when>
+
+ <!-- X.Org -->
+ <xsl:when test="$url='x.org/wiki/Releases/Download'">
+ <xsl:apply-templates select="id('content')//li/a" />
+ </xsl:when>
+
+ </xsl:choose>
+ </xsl:template>
+
+ <!--
+ Assume we're always going to be interested in href and not the
+ link text, which is often something useless like "FTP".
+ -->
+ <xsl:template match="a[@href]">
+ <xsl:apply-templates select="@href" />
+ </xsl:template>
+
+ <xsl:template match="text()|@*">
+ <xsl:value-of select="normalize-space(.)" />
+ <!-- Terminate each URL with a newline. -->
+ <xsl:text>
</xsl:text>
+ </xsl:template>
+</xsl:stylesheet>
</ins></span></pre>
</div>
</div>
</body>
</html>