ContentLinkSynchronization.sh 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #!/usr/bin/env bash
  2. #==========================================================================
  3. #
  4. # Copyright NumFOCUS
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # https://www.apache.org/licenses/LICENSE-2.0.txt
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. #
  18. #==========================================================================*/
  19. help_details() {
  20. cat <<helpcontent
  21. Usage: ContentLinkSynchronization.sh [--create] [--root-cid <ITKData root cid>] <ITK source tree path>
  22. This script, given an ExternalData object store, checks all ExternalData
  23. .cid content links in the ITK source are present in the ITKData/Objects/ directory
  24. verifies that hashes correspond to the same file, and creates the corresponding
  25. file entry in the ITK/Data repository.
  26. If content link verification fails, the script notifies the caller and exits.
  27. The error should be resolved manually before re-execution.
  28. Once executed, a datalad commit can be created from the result.
  29. This script should be executed prior to releases. The steps are:
  30. 1. Check out the version of ITK whose data will archived.
  31. 2. Run this script with the --create flag. This will copy new objects into Objects/CID/.
  32. 3. Upload the tree with: w3 put . --no-wrap -n ITKData-pre-verify -H
  33. 4. Run this script with the --root-cid flag given the root-cid from the previous step. This will verify and copy data files into their location in the ITK source tree.
  34. 5. Commit the result with datalad save -m "ENH: Updates for ITK-v<itk-release-version>"
  35. 6. Upload the repository update to web3.storage: w3 put . --no-wrap -n ITKData-v<itk-release-version> -H
  36. 7. Update the README.md with the new root CID.
  37. 8. Save the README.md update with datalad save -m "DOC: Update root CID for ITK-v<itk-release-version>"
  38. 7. Pin the resulting root CID for across pinning resources.
  39. helpcontent
  40. }
  41. die() {
  42. echo "$@" 1>&2; exit 1
  43. }
  44. itk_source_dir=""
  45. root_cid=""
  46. create=false
  47. help=false
  48. while [[ $# -gt 0 ]] ;
  49. do
  50. opt="$1";
  51. shift;
  52. case "$opt" in
  53. "-h"|"--help")
  54. help=true;;
  55. "-c"|"--create")
  56. create=true;;
  57. "-r"|"--root-cid")
  58. opt="$1"
  59. shift
  60. root_cid=$opt;;
  61. *) if test "${itk_source_dir}" = ""; then
  62. itk_source_dir=$opt;
  63. if test ! -e $itk_source_dir; then
  64. die "$itk_source_dir does not exist!"
  65. fi
  66. else
  67. echo >&2 "Invalid option: $opt"
  68. exit 1
  69. fi;;
  70. esac
  71. done
  72. if test "${itk_source_dir}" = "" || $help; then
  73. help_details
  74. die
  75. fi
  76. if ! type ipfs > /dev/null; then
  77. die "Please install the ipfs executable."
  78. fi
  79. top_level_dir=$(git rev-parse --show-toplevel)
  80. cd "$top_level_dir"
  81. mkdir -p Objects/CID
  82. object_store="$top_level_dir/Objects"
  83. verify_cids() {
  84. cd "$itk_source_dir"
  85. if test "${root_cid}" = ""; then
  86. die "--root-cid is required"
  87. fi
  88. algo=$1
  89. algo_upper=$(echo $algo | awk '{print toupper($0)}')
  90. find . -name "*.$algo" -print0 | while read -d '' -r content_link; do
  91. echo "Content link ${content_link} ..."
  92. if test -z "${content_link}"; then
  93. die "Empty content link!"
  94. continue
  95. fi
  96. algo_hash=$(cat "${content_link}" | tr -d '[[:space:]]')
  97. data_path=$(dirname "${content_link}")/$(basename "${content_link}" .${algo})
  98. object_path="${object_store}/${algo_upper}/${algo_hash}"
  99. echo "Verifying ${algo_hash} ..."
  100. if test ! -e "${object_path}"; then
  101. die "Could not find data object in store for $content_link!"
  102. fi
  103. # Verify
  104. cid_value=$(ipfs dag resolve /ipfs/${root_cid}/Objects/${algo_upper}/${algo_hash} || die "Could not resolve CID!")
  105. if test $algo = "cid" && test "${cid_value}" != "${algo_hash}"; then
  106. die "CID value for ${object_store}/${algo_upper}/${algo_hash} does not equal hash in ${content_link}!"
  107. else
  108. if test $algo != "cid"; then
  109. cp "$object_path" "${object_store}/CID/${cid_value}"
  110. rm -f "${itk_source_dir}/${data_path}.sha512"
  111. fi
  112. fi
  113. output_path="${top_level_dir}/$data_path"
  114. if ! test -e "$output_path"; then
  115. mkdir -p $(dirname "$output_path")
  116. cp "$object_path" "$output_path"
  117. fi
  118. done || exit 1
  119. }
  120. create_cids() {
  121. cd "$itk_source_dir"
  122. algo=$1
  123. algo_upper=$(echo $algo | awk '{print toupper($0)}')
  124. find . -name "*.$algo" -print0 | while read -d '' -r content_link; do
  125. echo "Content link ${content_link} ..."
  126. if test -z "${content_link}"; then
  127. die "Empty content link!"
  128. continue
  129. fi
  130. algo_hash=$(cat "${content_link}" | tr -d '[[:space:]]')
  131. data_path=$(dirname "${content_link}")/$(basename "${content_link}" .${algo})
  132. object_path="${object_store}/${algo_upper}/${algo_hash}"
  133. echo "Creating ${algo_hash} ${content_link}..."
  134. if test -e "${object_path}"; then
  135. if test "$algo" != "cid"; then
  136. # Create
  137. if test "${root_cid}" = ""; then
  138. die "--root-cid is required"
  139. fi
  140. cid_value=$(ipfs dag resolve /ipfs/${root_cid}/Objects/${algo_upper}/${algo_hash} || die "Could not resolve CID")
  141. echo $cid_value > "${itk_source_dir}/${data_path}.cid"
  142. rm -f "${itk_source_dir}/${data_path}.md5"
  143. fi
  144. elif test -e "${ExternalData_OBJECT_STORES}/${algo_upper}/${algo_hash}"; then
  145. cp "${ExternalData_OBJECT_STORES}/${algo_upper}/${algo_hash}" "${object_store}/${algo_upper}/${algo_hash}"
  146. elif test "$algo" = "cid"; then
  147. ipfs get /ipfs/$algo_hash --output="${object_store}/${algo_upper}/${algo_hash}"
  148. else
  149. # Expected until everything is migrated to CID's
  150. echo "Could not find data object in store for $content_link!"
  151. fi
  152. done || exit 1
  153. }
  154. if $create; then
  155. create_cids cid
  156. else
  157. verify_cids cid
  158. echo ""
  159. echo "Verification completed successfully."
  160. fi
  161. echo ""
  162. echo "Commit new content as necessary."