aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormerakor <cem@ckyln.com>2021-02-04 09:42:49 +0000
committermerakor <cem@ckyln.com>2021-02-04 09:42:49 +0000
commit37d93c5413b1025c8490b57edf2dea821320d2ca (patch)
treee48f5214c88cb2c032b99cbbe0fa492b6d7e5a56
parent492d8cca79b1cb392aa585bce2e269f40dba070a (diff)
downloadcpt-37d93c5413b1025c8490b57edf2dea821320d2ca.tar.gz
pkg_extract(): Add support for pax and add new methods
Even though we had a portable extraction method for tarballs, this slowed down the extraction immensely. I have added 2 new methods for tarball extraction so that we have faster extraction speeds based on the system tar implementation. Here are the options with the order of preference: - pax: Although an uncommon program, pax is defined by POSIX, and supports regular expressions, meaning that we can 'strip' components. This is slightly faster than bsdtar and GNU tar. - bsdtar/gnutar: We simply use '--strip-components 1' for these tar implementations. - tar: This is the portable method we had been using for a while, it's good, gets the job done, but it is slower. Here is also a little benchmarking I did by extracting the Linux tarball for 10 times per method. tarball=$CPT_CACHE/sources/linux-headers/linux-5.10.12.tar.xz for extract in pax bsdtar tar; do time sh -c ". cpt-lib extract=$extract for i in \$(seq 10); do mkdir -p hdr; cd hdr; tar_extract \"$tarball\"; cd ..; rm -rf hdr done" done This yields the following output, methods are as ordered above: sh -c 75.21s user 11.11s system 115% cpu 1:14.90 total sh -c 78.68s user 14.64s system 124% cpu 1:15.15 total sh -c 99.88s user 69.99s system 119% cpu 2:21.70 total FossilOrigin-Name: 45e3a9a03e7eff9b30b0e911d68d44068e8681ebf4501f271c8a686118f29243
-rw-r--r--src/cpt-lib.in126
1 files changed, 71 insertions, 55 deletions
diff --git a/src/cpt-lib.in b/src/cpt-lib.in
index 5690ebe..8c7989e 100644
--- a/src/cpt-lib.in
+++ b/src/cpt-lib.in
@@ -479,6 +479,69 @@ sh256() {
while read -r hash _; do printf '%s %s\n' "$hash" "$1"; done
}
+tar_extract() {
+ # Tarball extraction function that prefers pax(1) over tar(1). The reason we
+ # are preferring pax is that we can strip components without relying on
+ # ugly hacks such as the ones we are doing for 'tar'. Using 'tar' means that
+ # we either have to sacrifice speed or portability, and we are choosing to
+ # sacrifice speed. Fortunately, we don't have to make such a choice when
+ # using pax.
+ case "${extract##*/}" in
+ pax) decompress "$1" | pax -r -s '/[^\/]*\///' ;;
+ gtar|bsdtar) decompress "$1" | "$tar" xf - --strip-components 1 ;;
+ tar) decompress "$1" > .ktar
+
+ "$tar" xf .ktar || return
+
+ # We now list the contents of the tarball so we can do our
+ # version of 'strip-components'.
+ "$tar" tf .ktar |
+ while read -r file; do printf '%s\n' "${file%%/*}"; done |
+
+ # Do not repeat files.
+ uniq |
+
+ # For every directory in the base we move each file
+ # inside it to the upper directory.
+ while read -r dir ; do
+
+ # Skip if we are not dealing with a directory here.
+ # This way we don't remove files on the upper directory
+ # if a tar archive doesn't need directory stripping.
+ [ -d "${dir#.}" ] || continue
+
+ # Change into the directory in a subshell so we don't
+ # need to cd back to the upper directory.
+ (
+ cd "$dir"
+
+ # We use find because we want to move hidden files
+ # as well.
+ #
+ # Skip the file if it has the same name as the directory.
+ # We will deal with it later.
+ #
+ # Word splitting is intentional here.
+ # shellcheck disable=2046
+ find . \( ! -name . -prune \) ! -name "$dir" \
+ -exec mv -f {} .. \;
+
+ # If a file/directory with the same name as the directory
+ # exists, append a '.cptbak' to it and move it to the
+ # upper directory.
+ ! [ -e "$dir" ] || mv "$dir" "../${dir}.cptbak"
+ )
+ rmdir "$dir"
+
+ # If a backup file exists, move it into the original location.
+ ! [ -e "${dir}.cptbak" ] || mv "${dir}.cptbak" "$dir"
+ done
+
+ # Clean up the temporary tarball.
+ rm -f .ktar
+ esac
+}
+
pkg_owner() {
set +f
@@ -697,63 +760,10 @@ pkg_extract() {
# extraction. Other filetypes are simply copied to '$mak_dir'
# which allows for manual extraction.
*://*.tar|*://*.tar.??|*://*.tar.???|*://*.tar.????|*://*.tgz|*://*.txz)
-
- decompress "$src_dir/$1/${src##*/}" > .ktar
-
- "$tar" xf .ktar || die "$1" "Couldn't extract ${src##*/}"
-
- # We now list the contents of the tarball so we can do our
- # version of 'strip-components'.
- "$tar" tf .ktar |
- while read -r file; do printf '%s\n' "${file%%/*}"; done |
-
- # Do not repeat files.
- uniq |
-
- # For every directory in the base we move each file
- # inside it to the upper directory.
- while read -r dir ; do
-
- # Skip if we are not dealing with a directory here.
- # This way we don't remove files on the upper directory
- # if a tar archive doesn't need directory stripping.
- [ -d "${dir#.}" ] || continue
-
- # Change into the directory in a subshell so we don't
- # need to cd back to the upper directory.
- (
- cd "$dir"
-
- # We use find because we want to move hidden files
- # as well.
- #
- # Skip the file if it has the same name as the directory.
- # We will deal with it later.
- #
- # Word splitting is intentional here.
- # shellcheck disable=2046
- find . \( ! -name . -prune \) ! -name "$dir" \
- -exec mv -f {} .. \;
-
- # If a file/directory with the same name as the directory
- # exists, append a '.cptbak' to it and move it to the
- # upper directory.
- ! [ -e "$dir" ] || mv "$dir" "../${dir}.cptbak"
- )
- rmdir "$dir"
-
- # If a backup file exists, move it into the original location.
- ! [ -e "${dir}.cptbak" ] || mv "${dir}.cptbak" "$dir"
- done
-
- # Clean up the temporary tarball.
- rm -f .ktar
- ;;
+ tar_extract "$src_dir/$1/${src##*/}" ;;
*://*.cpio|*://*.cpio.??|*://*.cpio.???|*://*.cpio.????)
- decompress "$src_dir/$1/${src##*/}" | cpio -i
-
- ;;
+ decompress "$src_dir/$1/${src##*/}" | pax -r ;;
*://*.zip)
unzip "$src_dir/$1/${src##*/}" ||
@@ -1961,6 +1971,12 @@ create_cache() {
# you value performance.
tar=$(command -v bsdtar || command -v gtar) || tar=tar
+ # Prefer libarchive tar, GNU tar, or the POSIX defined pax for tarball
+ # extraction, as they can strip components, which is much much faster than
+ # our portability function. Our first preference is pax, because it is
+ # actually slightly faster than bsdtar and GNU tar.
+ extract=$(command -v pax || command -v "$tar")
+
# Figure out which 'sudo' command to use based on the user's choice or
# what is available on the system.
su=${CPT_SU:-$(command -v sls ||