| #!/usr/bin/env bash |
| set -eo pipefail |
| |
| # hello-world latest ef872312fe1b 3 months ago 910 B |
| # hello-world latest ef872312fe1bbc5e05aae626791a47ee9b032efa8f3bda39cc0be7b56bfe59b9 3 months ago 910 B |
| |
| # debian latest f6fab3b798be 10 weeks ago 85.1 MB |
| # debian latest f6fab3b798be3174f45aa1eb731f8182705555f89c9026d8c1ef230cbf8301dd 10 weeks ago 85.1 MB |
| |
| # check if essential commands are in our PATH |
| for cmd in curl jq go; do |
| if ! command -v $cmd &> /dev/null; then |
| echo >&2 "error: \"$cmd\" not found!" |
| exit 1 |
| fi |
| done |
| |
| usage() { |
| echo "usage: $0 dir image[:tag][@digest] ..." |
| echo " $0 /tmp/old-hello-world hello-world:latest@sha256:8be990ef2aeb16dbcb9271ddfe2610fa6658d13f6dfb8bc72074cc1ca36966a7" |
| [ -z "$1" ] || exit "$1" |
| } |
| |
| dir="$1" # dir for building tar in |
| shift || usage 1 >&2 |
| |
| [ $# -gt 0 -a "$dir" ] || usage 2 >&2 |
| mkdir -p "$dir" |
| |
| # hacky workarounds for Bash 3 support (no associative arrays) |
| images=() |
| rm -f "$dir"/tags-*.tmp |
| manifestJsonEntries=() |
| doNotGenerateManifestJson= |
| # repositories[busybox]='"latest": "...", "ubuntu-14.04": "..."' |
| |
| # bash v4 on Windows CI requires CRLF separator |
| newlineIFS=$'\n' |
| if [ "$(go env GOHOSTOS)" = 'windows' ]; then |
| major=$(echo ${BASH_VERSION%%[^0.9]} | cut -d. -f1) |
| if [ "$major" -ge 4 ]; then |
| newlineIFS=$'\r\n' |
| fi |
| fi |
| |
| registryBase='https://registry-1.docker.io' |
| authBase='https://auth.docker.io' |
| authService='registry.docker.io' |
| |
| # https://github.com/moby/moby/issues/33700 |
| fetch_blob() { |
| local token="$1"; shift |
| local image="$1"; shift |
| local digest="$1"; shift |
| local targetFile="$1"; shift |
| local curlArgs=( "$@" ) |
| |
| local curlHeaders="$( |
| curl -S "${curlArgs[@]}" \ |
| -H "Authorization: Bearer $token" \ |
| "$registryBase/v2/$image/blobs/$digest" \ |
| -o "$targetFile" \ |
| -D- |
| )" |
| curlHeaders="$(echo "$curlHeaders" | tr -d '\r')" |
| if grep -qE "^HTTP/[0-9].[0-9] 3" <<<"$curlHeaders"; then |
| rm -f "$targetFile" |
| |
| local blobRedirect="$(echo "$curlHeaders" | awk -F ': ' 'tolower($1) == "location" { print $2; exit }')" |
| if [ -z "$blobRedirect" ]; then |
| echo >&2 "error: failed fetching '$image' blob '$digest'" |
| echo "$curlHeaders" | head -1 >&2 |
| return 1 |
| fi |
| |
| curl -fSL "${curlArgs[@]}" \ |
| "$blobRedirect" \ |
| -o "$targetFile" |
| fi |
| } |
| |
| # handle 'application/vnd.docker.distribution.manifest.v2+json' manifest |
| handle_single_manifest_v2() { |
| local manifestJson="$1"; shift |
| |
| local configDigest="$(echo "$manifestJson" | jq --raw-output '.config.digest')" |
| local imageId="${configDigest#*:}" # strip off "sha256:" |
| |
| local configFile="$imageId.json" |
| fetch_blob "$token" "$image" "$configDigest" "$dir/$configFile" -s |
| |
| local layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.layers[]')" |
| local IFS="$newlineIFS" |
| local layers=( $layersFs ) |
| unset IFS |
| |
| echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..." |
| local layerId= |
| local layerFiles=() |
| for i in "${!layers[@]}"; do |
| local layerMeta="${layers[$i]}" |
| |
| local layerMediaType="$(echo "$layerMeta" | jq --raw-output '.mediaType')" |
| local layerDigest="$(echo "$layerMeta" | jq --raw-output '.digest')" |
| |
| # save the previous layer's ID |
| local parentId="$layerId" |
| # create a new fake layer ID based on this layer's digest and the previous layer's fake ID |
| layerId="$(echo "$parentId"$'\n'"$layerDigest" | sha256sum | cut -d' ' -f1)" |
| # this accounts for the possibility that an image contains the same layer twice (and thus has a duplicate digest value) |
| |
| mkdir -p "$dir/$layerId" |
| echo '1.0' > "$dir/$layerId/VERSION" |
| |
| if [ ! -s "$dir/$layerId/json" ]; then |
| local parentJson="$(printf ', parent: "%s"' "$parentId")" |
| local addJson="$(printf '{ id: "%s"%s }' "$layerId" "${parentId:+$parentJson}")" |
| # this starter JSON is taken directly from Docker's own "docker save" output for unimportant layers |
| jq "$addJson + ." > "$dir/$layerId/json" <<-'EOJSON' |
| { |
| "created": "0001-01-01T00:00:00Z", |
| "container_config": { |
| "Hostname": "", |
| "Domainname": "", |
| "User": "", |
| "AttachStdin": false, |
| "AttachStdout": false, |
| "AttachStderr": false, |
| "Tty": false, |
| "OpenStdin": false, |
| "StdinOnce": false, |
| "Env": null, |
| "Cmd": null, |
| "Image": "", |
| "Volumes": null, |
| "WorkingDir": "", |
| "Entrypoint": null, |
| "OnBuild": null, |
| "Labels": null |
| } |
| } |
| EOJSON |
| fi |
| |
| case "$layerMediaType" in |
| application/vnd.docker.image.rootfs.diff.tar.gzip) |
| local layerTar="$layerId/layer.tar" |
| layerFiles=( "${layerFiles[@]}" "$layerTar" ) |
| # TODO figure out why "-C -" doesn't work here |
| # "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume." |
| # "HTTP/1.1 416 Requested Range Not Satisfiable" |
| if [ -f "$dir/$layerTar" ]; then |
| # TODO hackpatch for no -C support :'( |
| echo "skipping existing ${layerId:0:12}" |
| continue |
| fi |
| local token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')" |
| fetch_blob "$token" "$image" "$layerDigest" "$dir/$layerTar" --progress |
| ;; |
| |
| *) |
| echo >&2 "error: unknown layer mediaType ($imageIdentifier, $layerDigest): '$layerMediaType'" |
| exit 1 |
| ;; |
| esac |
| done |
| |
| # change "$imageId" to be the ID of the last layer we added (needed for old-style "repositories" file which is created later -- specifically for older Docker daemons) |
| imageId="$layerId" |
| |
| # munge the top layer image manifest to have the appropriate image configuration for older daemons |
| local imageOldConfig="$(jq --raw-output --compact-output '{ id: .id } + if .parent then { parent: .parent } else {} end' "$dir/$imageId/json")" |
| jq --raw-output "$imageOldConfig + del(.history, .rootfs)" "$dir/$configFile" > "$dir/$imageId/json" |
| |
| local manifestJsonEntry="$( |
| echo '{}' | jq --raw-output '. + { |
| Config: "'"$configFile"'", |
| RepoTags: ["'"${image#library\/}:$tag"'"], |
| Layers: '"$(echo '[]' | jq --raw-output ".$(for layerFile in "${layerFiles[@]}"; do echo " + [ \"$layerFile\" ]"; done)")"' |
| }' |
| )" |
| manifestJsonEntries=( "${manifestJsonEntries[@]}" "$manifestJsonEntry" ) |
| } |
| |
| while [ $# -gt 0 ]; do |
| imageTag="$1" |
| shift |
| image="${imageTag%%[:@]*}" |
| imageTag="${imageTag#*:}" |
| digest="${imageTag##*@}" |
| tag="${imageTag%%@*}" |
| |
| # add prefix library if passed official image |
| if [[ "$image" != *"/"* ]]; then |
| image="library/$image" |
| fi |
| |
| imageFile="${image//\//_}" # "/" can't be in filenames :) |
| |
| token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')" |
| |
| manifestJson="$( |
| curl -fsSL \ |
| -H "Authorization: Bearer $token" \ |
| -H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \ |
| -H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \ |
| -H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \ |
| "$registryBase/v2/$image/manifests/$digest" |
| )" |
| if [ "${manifestJson:0:1}" != '{' ]; then |
| echo >&2 "error: /v2/$image/manifests/$digest returned something unexpected:" |
| echo >&2 " $manifestJson" |
| exit 1 |
| fi |
| |
| imageIdentifier="$image:$tag@$digest" |
| |
| schemaVersion="$(echo "$manifestJson" | jq --raw-output '.schemaVersion')" |
| case "$schemaVersion" in |
| 2) |
| mediaType="$(echo "$manifestJson" | jq --raw-output '.mediaType')" |
| |
| case "$mediaType" in |
| application/vnd.docker.distribution.manifest.v2+json) |
| handle_single_manifest_v2 "$manifestJson" |
| ;; |
| application/vnd.docker.distribution.manifest.list.v2+json) |
| layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.manifests[]')" |
| IFS="$newlineIFS" |
| layers=( $layersFs ) |
| unset IFS |
| |
| found="" |
| # parse first level multi-arch manifest |
| for i in "${!layers[@]}"; do |
| layerMeta="${layers[$i]}" |
| maniArch="$(echo "$layerMeta" | jq --raw-output '.platform.architecture')" |
| if [ "$maniArch" = "$(go env GOARCH)" ]; then |
| digest="$(echo "$layerMeta" | jq --raw-output '.digest')" |
| # get second level single manifest |
| submanifestJson="$( |
| curl -fsSL \ |
| -H "Authorization: Bearer $token" \ |
| -H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \ |
| -H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \ |
| -H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \ |
| "$registryBase/v2/$image/manifests/$digest" |
| )" |
| handle_single_manifest_v2 "$submanifestJson" |
| found="found" |
| break |
| fi |
| done |
| if [ -z "$found" ]; then |
| echo >&2 "error: manifest for $maniArch is not found" |
| exit 1 |
| fi |
| ;; |
| *) |
| echo >&2 "error: unknown manifest mediaType ($imageIdentifier): '$mediaType'" |
| exit 1 |
| ;; |
| esac |
| ;; |
| |
| 1) |
| if [ -z "$doNotGenerateManifestJson" ]; then |
| echo >&2 "warning: '$imageIdentifier' uses schemaVersion '$schemaVersion'" |
| echo >&2 " this script cannot (currently) recreate the 'image config' to put in a 'manifest.json' (thus any schemaVersion 2+ images will be imported in the old way, and their 'docker history' will suffer)" |
| echo >&2 |
| doNotGenerateManifestJson=1 |
| fi |
| |
| layersFs="$(echo "$manifestJson" | jq --raw-output '.fsLayers | .[] | .blobSum')" |
| IFS="$newlineIFS" |
| layers=( $layersFs ) |
| unset IFS |
| |
| history="$(echo "$manifestJson" | jq '.history | [.[] | .v1Compatibility]')" |
| imageId="$(echo "$history" | jq --raw-output '.[0]' | jq --raw-output '.id')" |
| |
| echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..." |
| for i in "${!layers[@]}"; do |
| imageJson="$(echo "$history" | jq --raw-output ".[${i}]")" |
| layerId="$(echo "$imageJson" | jq --raw-output '.id')" |
| imageLayer="${layers[$i]}" |
| |
| mkdir -p "$dir/$layerId" |
| echo '1.0' > "$dir/$layerId/VERSION" |
| |
| echo "$imageJson" > "$dir/$layerId/json" |
| |
| # TODO figure out why "-C -" doesn't work here |
| # "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume." |
| # "HTTP/1.1 416 Requested Range Not Satisfiable" |
| if [ -f "$dir/$layerId/layer.tar" ]; then |
| # TODO hackpatch for no -C support :'( |
| echo "skipping existing ${layerId:0:12}" |
| continue |
| fi |
| token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')" |
| fetch_blob "$token" "$image" "$imageLayer" "$dir/$layerId/layer.tar" --progress |
| done |
| ;; |
| |
| *) |
| echo >&2 "error: unknown manifest schemaVersion ($imageIdentifier): '$schemaVersion'" |
| exit 1 |
| ;; |
| esac |
| |
| echo |
| |
| if [ -s "$dir/tags-$imageFile.tmp" ]; then |
| echo -n ', ' >> "$dir/tags-$imageFile.tmp" |
| else |
| images=( "${images[@]}" "$image" ) |
| fi |
| echo -n '"'"$tag"'": "'"$imageId"'"' >> "$dir/tags-$imageFile.tmp" |
| done |
| |
| echo -n '{' > "$dir/repositories" |
| firstImage=1 |
| for image in "${images[@]}"; do |
| imageFile="${image//\//_}" # "/" can't be in filenames :) |
| image="${image#library\/}" |
| |
| [ "$firstImage" ] || echo -n ',' >> "$dir/repositories" |
| firstImage= |
| echo -n $'\n\t' >> "$dir/repositories" |
| echo -n '"'"$image"'": { '"$(cat "$dir/tags-$imageFile.tmp")"' }' >> "$dir/repositories" |
| done |
| echo -n $'\n}\n' >> "$dir/repositories" |
| |
| rm -f "$dir"/tags-*.tmp |
| |
| if [ -z "$doNotGenerateManifestJson" ] && [ "${#manifestJsonEntries[@]}" -gt 0 ]; then |
| echo '[]' | jq --raw-output ".$(for entry in "${manifestJsonEntries[@]}"; do echo " + [ $entry ]"; done)" > "$dir/manifest.json" |
| else |
| rm -f "$dir/manifest.json" |
| fi |
| |
| echo "Download of images into '$dir' complete." |
| echo "Use something like the following to load the result into a Docker daemon:" |
| echo " tar -cC '$dir' . | docker load" |