diff --git a/src/script/nix-prefetch-git b/src/script/nix-prefetch-git index 13acb003..34dfe67d 100755 --- a/src/script/nix-prefetch-git +++ b/src/script/nix-prefetch-git @@ -1,90 +1,110 @@ -#! /bin/sh -e +#! /usr/bin/env bash -url=$1 -rev=$2 -expHash=$3 +set -e -o pipefail + +url= +rev= +expHash= hashType=$NIX_HASH_ALGO deepClone=$NIX_PREFETCH_GIT_DEEP_CLONE leaveDotGit=$NIX_PREFETCH_GIT_LEAVE_DOT_GIT +fetchSubmodules= builder= +branchName=$NIX_PREFETCH_GIT_BRANCH_NAME + +# ENV params +out=${out:-} +http_proxy=${http_proxy:-} + +# populated by clone_user_rev() +fullRev= +humanReadableRev= +commitDate= +commitDateStrict8601= if test -n "$deepClone"; then - deepClone=true + deepClone=true else - deepClone=false + deepClone=false fi if test "$leaveDotGit" != 1; then - leaveDotGit= + leaveDotGit= else - leaveDotGit=true + leaveDotGit=true fi - -argi=0 -argfun="" -for arg; do - if test -z "$argfun"; then - case $arg in - --out) argfun=set_out;; - --url) argfun=set_url;; - --rev) argfun=set_rev;; - --hash) argfun=set_hashType;; - --deepClone) deepClone=true;; - --no-deepClone) deepClone=false;; - --leave-dotGit) leaveDotGit=true;; - --builder) builder=true;; - *) - argi=$(($argi + 1)) - case $argi in - 1) url=$arg;; - 2) rev=$arg;; - 3) expHash=$arg;; - *) exit 1;; - esac - ;; - esac - else - case $argfun in - set_*) - var=$(echo $argfun | sed 's,^set_,,') - eval $var=$arg - ;; - esac - argfun="" - fi -done - usage(){ echo >&2 "syntax: nix-prefetch-git [options] [URL [REVISION [EXPECTED-HASH]]] Options: --out path Path where the output would be stored. - --url url Any url understand by 'git clone'. + --url url Any url understood by 'git clone'. --rev ref Any sha1 or references (such as refs/heads/master) --hash h Expected hash. - --deepClone Clone submodules recursively. - --no-deepClone Do not clone submodules. + --deepClone Clone the entire repository. + --no-deepClone Make a shallow clone of just the required ref. --leave-dotGit Keep the .git directories. + --fetch-submodules Fetch submodules. --builder Clone as fetchgit does, but url, rev, and out option are mandatory. + --quiet Only print the final json summary. " exit 1 } +argi=0 +argfun="" +for arg; do + if test -z "$argfun"; then + case $arg in + --out) argfun=set_out;; + --url) argfun=set_url;; + --rev) argfun=set_rev;; + --hash) argfun=set_hashType;; + --branch-name) argfun=set_branchName;; + --deepClone) deepClone=true;; + --quiet) QUIET=true;; + --no-deepClone) deepClone=false;; + --leave-dotGit) leaveDotGit=true;; + --fetch-submodules) fetchSubmodules=true;; + --builder) builder=true;; + --help) usage; exit;; + *) + : $((++argi)) + case $argi in + 1) url=$arg;; + 2) rev=$arg;; + 3) expHash=$arg;; + *) exit 1;; + esac + ;; + esac + else + case $argfun in + set_*) + var=${argfun#set_} + eval $var=$arg + ;; + esac + argfun="" + fi +done + if test -z "$url"; then - usage + usage fi init_remote(){ - local url=$1; - git init; - git remote add origin $url; + local url=$1 + git init + git remote add origin "$url" + ( [ -n "$http_proxy" ] && git config http.proxy "$http_proxy" ) || true } # Return the reference of an hash if it exists on the remote repository. ref_from_hash(){ - local hash=$1; + local hash=$1 git ls-remote origin | sed -n "\,$hash\t, { s,\(.*\)\t\(.*\),\2,; p; q}" } @@ -94,43 +114,60 @@ hash_from_ref(){ git ls-remote origin | sed -n "\,\t$ref, { s,\(.*\)\t\(.*\),\1,; p; q}" } +# Returns a name based on the url and reference +# +# This function needs to be in sync with nix's fetchgit implementation +# of urlToName() to re-use the same nix store paths. +url_to_name(){ + local url=$1 + local ref=$2 + local base + base=$(basename "$url" .git | cut -d: -f2) + + if [[ $ref =~ ^[a-z0-9]+$ ]]; then + echo "$base-${ref:0:7}" + else + echo "$base" + fi +} + # Fetch everything and checkout the right sha1 checkout_hash(){ - local hash="$1"; - local ref="$2"; + local hash="$1" + local ref="$2" if test -z "$hash"; then - hash=$(hash_from_ref $ref); - fi; + hash=$(hash_from_ref "$ref") + fi - git fetch ${builder:+--progress} origin || return 1 - git checkout -b fetchgit $hash || return 1 + git fetch -t ${builder:+--progress} origin || return 1 + git checkout -b "$branchName" "$hash" || return 1 } # Fetch only a branch/tag and checkout it. checkout_ref(){ - local hash="$1"; - local ref="$2"; + local hash="$1" + local ref="$2" if "$deepClone"; then - # The caller explicitly asked for a deep clone. Deep clones - # allow "git describe" and similar tools to work. See - # http://thread.gmane.org/gmane.linux.distributions.nixos/3569 - # for a discussion. - return 1 + # The caller explicitly asked for a deep clone. Deep clones + # allow "git describe" and similar tools to work. See + # http://thread.gmane.org/gmane.linux.distributions.nixos/3569 + # for a discussion. + return 1 fi if test -z "$ref"; then - ref=$(ref_from_hash $hash); - fi; + ref=$(ref_from_hash "$hash") + fi if test -n "$ref"; then # --depth option is ignored on http repository. git fetch ${builder:+--progress} --depth 1 origin +"$ref" || return 1 - git checkout -b fetchgit FETCH_HEAD || return 1 + git checkout -b "$branchName" FETCH_HEAD || return 1 else - return 1; - fi; + return 1 + fi } # Update submodules @@ -140,130 +177,243 @@ init_submodules(){ # list submodule directories and their hashes git submodule status | - while read l; do - # checkout each submodule - local hash=$(echo $l | sed 's,^-\([0-9a-f]*\) \(.*\)$,\1,'); - local dir=$(echo $l | sed 's,^-\([0-9a-f]*\) \(.*\)$,\2,'); - local url=$(sed -n "\,$dir, { :loop; n; s,^.*url = ,,; T loop; p; q }" .git/config); + while read -r l; do + local hash + local dir + local name + local url - clone "$dir" "$url" "$hash" ""; - done; + # checkout each submodule + hash=$(echo "$l" | awk '{print $1}' | tr -d '-') + dir=$(echo "$l" | sed -n 's/^.[0-9a-f]\+ \(.*[^)]*\)\( (.*)\)\?$/\1/p') + name=$( + git config -f .gitmodules --get-regexp submodule\..*\.path | + sed -n "s,^\(.*\)\.path $dir\$,\\1,p") + url=$(git config --get "${name}.url") + + clone "$dir" "$url" "$hash" "" + done } clone(){ - local top=$(pwd) + local top=$PWD local dir="$1" local url="$2" local hash="$3" local ref="$4" - cd $dir; + cd "$dir" # Initialize the repository. - init_remote "$url"; + init_remote "$url" # Download data from the repository. checkout_ref "$hash" "$ref" || checkout_hash "$hash" "$ref" || ( - echo 1>&2 "Unable to checkout $hash$ref from $url."; - exit 1; + echo 1>&2 "Unable to checkout $hash$ref from $url." + exit 1 ) # Checkout linked sources. - init_submodules; - - if [ -z "$builder" -a -f .topdeps ]; then - if tg help 2>&1 > /dev/null - then - echo "populating TopGit branches..." - tg remote --populate origin - else - echo "WARNING: would populate TopGit branches but TopGit is not available" >&2 - echo "WARNING: install TopGit to fix the problem" >&2 - fi + if test -n "$fetchSubmodules"; then + init_submodules fi - cd $top; + if [ -z "$builder" ] && [ -f .topdeps ]; then + if tg help &>/dev/null; then + echo "populating TopGit branches..." + tg remote --populate origin + else + echo "WARNING: would populate TopGit branches but TopGit is not available" >&2 + echo "WARNING: install TopGit to fix the problem" >&2 + fi + fi + + cd "$top" } -clone_user_rev() { +# Remove all remote branches, remove tags not reachable from HEAD, do a full +# repack and then garbage collect unreferenced objects. +make_deterministic_repo(){ + local repo="$1" + + # run in sub-shell to not touch current working directory + ( + cd "$repo" + # Remove files that contain timestamps or otherwise have non-deterministic + # properties. + rm -rf .git/logs/ .git/hooks/ .git/index .git/FETCH_HEAD .git/ORIG_HEAD \ + .git/refs/remotes/origin/HEAD .git/config + + # Remove all remote branches. + git branch -r | while read -r branch; do + git branch -rD "$branch" >&2 + done + + # Remove tags not reachable from HEAD. If we're exactly on a tag, don't + # delete it. + maybe_tag=$(git tag --points-at HEAD) + git tag --contains HEAD | while read -r tag; do + if [ "$tag" != "$maybe_tag" ]; then + git tag -d "$tag" >&2 + fi + done + + # Do a full repack. Must run single-threaded, or else we lose determinism. + git config pack.threads 1 + git repack -A -d -f + rm -f .git/config + + # Garbage collect unreferenced objects. + git gc --prune=all + ) +} + + +_clone_user_rev() { local dir="$1" local url="$2" - local rev="$3" + local rev="${3:-HEAD}" # Perform the checkout. case "$rev" in HEAD|refs/*) clone "$dir" "$url" "" "$rev" 1>&2;; - [0-9a-f]*) - if test -z "$(echo $rev | tr -d 0123456789abcdef)"; then - clone "$dir" "$url" "$rev" "" 1>&2; + *) + if test -z "$(echo "$rev" | tr -d 0123456789abcdef)"; then + clone "$dir" "$url" "$rev" "" 1>&2 else - echo 1>&2 "Bad commit hash or bad reference."; - exit 1; + # if revision is not hexadecimal it might be a tag + clone "$dir" "$url" "" "refs/tags/$rev" 1>&2 fi;; - "") - clone "$dir" "$url" "" "HEAD" 1>&2;; esac + pushd "$dir" >/dev/null + fullRev=$( (git rev-parse "$rev" 2>/dev/null || git rev-parse "refs/heads/$branchName") | tail -n1) + humanReadableRev=$(git describe "$fullRev" 2> /dev/null || git describe --tags "$fullRev" 2> /dev/null || echo -- none --) + commitDate=$(git show -1 --no-patch --pretty=%ci "$fullRev") + commitDateStrict8601=$(git show -1 --no-patch --pretty=%cI "$fullRev") + popd >/dev/null + # Allow doing additional processing before .git removal eval "$NIX_PREFETCH_GIT_CHECKOUT_HOOK" if test -z "$leaveDotGit"; then - echo "removing \`.git'..." >&2 - find $dir -name .git | xargs rm -rf + echo "removing \`.git'..." >&2 + find "$dir" -name .git -print0 | xargs -0 rm -rf + else + find "$dir" -name .git | while read -r gitdir; do + make_deterministic_repo "$(readlink -f "$gitdir/..")" + done fi } -if test -n "$builder"; then - test -n "$out" -a -n "$url" -a -n "$rev" || usage - mkdir $out - clone_user_rev "$out" "$url" "$rev" -else - if test -z "$hashType"; then - hashType=sha256 - fi +clone_user_rev() { + if ! test -n "$QUIET"; then + _clone_user_rev "$@" + else + errfile="$(mktemp "${TMPDIR:-/tmp}/git-checkout-err-XXXXXXXX")" + # shellcheck disable=SC2064 + trap "rm -rf \"$errfile\"" EXIT + _clone_user_rev "$@" 2> "$errfile" || ( + status="$?" + cat "$errfile" >&2 + exit "$status" + ) + fi +} - # If the hash was given, a file with that hash may already be in the - # store. - if test -n "$expHash"; then - finalPath=$(nix-store --print-fixed-path --recursive "$hashType" "$expHash" source) - if ! nix-store --check-validity "$finalPath" 2> /dev/null; then - finalPath= - fi - hash=$expHash - fi +json_escape() { + local s="$1" + s="${s//\\/\\\\}" # \ + s="${s//\"/\\\"}" # " + s="${s//^H/\\\b}" # \b (backspace) + s="${s//^L/\\\f}" # \f (form feed) + s="${s// +/\\\n}" # \n (newline) + s="${s//^M/\\\r}" # \r (carriage return) + s="${s// /\\t}" # \t (tab) + echo "$s" +} - # If we don't know the hash or a path with that hash doesn't exist, - # download the file and add it to the store. - if test -z "$finalPath"; then +print_results() { + hash="$1" + if ! test -n "$QUIET"; then + echo "" >&2 + echo "git revision is $fullRev" >&2 + if test -n "$finalPath"; then + echo "path is $finalPath" >&2 + fi + echo "git human-readable version is $humanReadableRev" >&2 + echo "Commit date is $commitDate" >&2 + if test -n "$hash"; then + echo "hash is $hash" >&2 + fi + fi + if test -n "$hash"; then + cat <&2; fi - - # Add the downloaded file to the Nix store. - finalPath=$(nix-store --add-fixed --recursive "$hashType" $tmpFile) - - if test -n "$expHash" -a "$expHash" != "$hash"; then - echo "hash mismatch for URL \`$url'" - exit 1 - fi - fi - - if ! test -n "$QUIET"; then echo "path is $finalPath" >&2; fi - - echo $hash - - if test -n "$PRINT_PATH"; then - echo $finalPath - fi +if test -z "$branchName"; then + branchName=fetchgit +fi + +if test -n "$builder"; then + test -n "$out" -a -n "$url" -a -n "$rev" || usage + mkdir -p "$out" + clone_user_rev "$out" "$url" "$rev" +else + if test -z "$hashType"; then + hashType=sha256 + fi + + # If the hash was given, a file with that hash may already be in the + # store. + if test -n "$expHash"; then + finalPath=$(nix-store --print-fixed-path --recursive "$hashType" "$expHash" "$(url_to_name "$url" "$rev")") + if ! nix-store --check-validity "$finalPath" 2> /dev/null; then + finalPath= + fi + hash=$expHash + fi + + # If we don't know the hash or a path with that hash doesn't exist, + # download the file and add it to the store. + if test -z "$finalPath"; then + + tmpPath="$(mktemp -d "${TMPDIR:-/tmp}/git-checkout-tmp-XXXXXXXX")" + # shellcheck disable=SC2064 + trap "rm -rf \"$tmpPath\"" EXIT + + tmpFile="$tmpPath/$(url_to_name "$url" "$rev")" + mkdir -p "$tmpFile" + + # Perform the checkout. + clone_user_rev "$tmpFile" "$url" "$rev" + + # Compute the hash. + hash=$(nix-hash --type $hashType --base32 "$tmpFile") + + # Add the downloaded file to the Nix store. + finalPath=$(nix-store --add-fixed --recursive "$hashType" "$tmpFile") + + if test -n "$expHash" -a "$expHash" != "$hash"; then + echo "hash mismatch for URL \`$url'. Got \`$hash'; expected \`$expHash'." >&2 + exit 1 + fi + fi + + print_results "$hash" + + if test -n "$PRINT_PATH"; then + echo "$finalPath" + fi fi