extract.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. # Extract files from archives.
  2. from os import O_CREAT, O_WRONLY, fdopen, mkdir, open as osopen, utime
  3. try:
  4. from os import O_BINARY
  5. except ImportError:
  6. # Platforms that do not define O_BINARY do not need it either.
  7. O_BINARY = 0
  8. try:
  9. from os import symlink
  10. except ImportError:
  11. def symlink(source, link_name):
  12. raise RuntimeError(
  13. 'OS does not support symlink creation: %s -> %s'
  14. % (link_name, source)
  15. )
  16. from os.path import abspath, isdir, join as joinpath, sep, split as splitpath
  17. from stat import S_IRWXU, S_IRWXG, S_IRWXO, S_IXUSR, S_IXGRP, S_IXOTH
  18. import sys
  19. import tarfile
  20. from detectsys import detectOS
  21. hostOS = detectOS()
  22. # Note: Larger buffers might make extraction slower.
  23. bufSize = 16384
  24. def extract(archivePath, destDir, rename = None):
  25. '''Extract the given archive to the given directory.
  26. If a rename function is given, it is called with the output path relative
  27. to the destination directory; the value returned by the rename function is
  28. used as the actual relative destination file path.
  29. This function sets file ownership and permissions like is done in newly
  30. created files and ignores the ownership and permissions from the archive,
  31. since we are not restoring a backup.
  32. '''
  33. absDestDir = abspath(destDir) + sep
  34. if not isdir(absDestDir):
  35. raise ValueError(
  36. 'Destination directory "%s" does not exist' % absDestDir
  37. )
  38. tar = tarfile.open(archivePath)
  39. # Note: According to the Python 2.6 docs, errorlevel can be passed as a
  40. # keyword argument to the open() call, but on Python 2.5 this does
  41. # not work.
  42. tar.errorlevel = 2
  43. try:
  44. for member in tar.getmembers():
  45. absMemberPath = abspath(joinpath(absDestDir, member.name))
  46. if member.isdir():
  47. absMemberPath += sep
  48. if not absMemberPath.startswith(absDestDir):
  49. raise ValueError(
  50. 'Refusing to extract tar entry "%s" '
  51. 'outside destination directory'
  52. % member.name
  53. )
  54. if rename:
  55. absMemberPath = absDestDir + rename(
  56. absMemberPath[len(absDestDir) : ]
  57. )
  58. if member.isfile():
  59. mode = S_IRWXU | S_IRWXG | S_IRWXO
  60. if not (member.mode & S_IXUSR):
  61. mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH)
  62. out = fdopen(
  63. osopen(absMemberPath, O_CREAT | O_WRONLY | O_BINARY, mode),
  64. 'wb'
  65. )
  66. try:
  67. inp = tar.extractfile(member)
  68. bytesLeft = member.size
  69. while bytesLeft > 0:
  70. buf = inp.read(bufSize)
  71. out.write(buf)
  72. bytesLeft -= len(buf)
  73. buf = None
  74. finally:
  75. out.close()
  76. elif member.isdir():
  77. if not isdir(absMemberPath):
  78. mkdir(absMemberPath)
  79. elif member.issym():
  80. symlink(member.linkname, absMemberPath)
  81. else:
  82. raise ValueError(
  83. 'Cannot extract tar entry "%s": '
  84. 'not a regular file, symlink or directory'
  85. % member.name
  86. )
  87. # Set file/directory modification time to match the archive.
  88. # For example autotools track dependencies between archived files
  89. # and will attempt to regenerate them if the time stamps indicate
  90. # one is older than the other.
  91. # Note: Apparently Python 2.5's utime() cannot set timestamps on
  92. # directories in Windows.
  93. if member.isfile() or (
  94. member.isdir() and not hostOS.startswith('mingw')
  95. ):
  96. utime(absMemberPath, (member.mtime, member.mtime))
  97. finally:
  98. tar.close()
  99. class TopLevelDirRenamer(object):
  100. def __init__(self, newName):
  101. self.newName = newName
  102. def __call__(self, oldPath):
  103. head, tail = splitpath(oldPath)
  104. headParts = head.split(sep)
  105. if not headParts:
  106. raise ValueError(
  107. 'Directory part is empty for entry "%s"' % oldPath
  108. )
  109. headParts[0] = self.newName
  110. return sep.join(headParts + [ tail ])
  111. if __name__ == '__main__':
  112. if 3 <= len(sys.argv) <= 4:
  113. if len(sys.argv) == 4:
  114. renameTopLevelDir = TopLevelDirRenamer(sys.argv[3])
  115. else:
  116. renameTopLevelDir = None
  117. extract(sys.argv[1], sys.argv[2], renameTopLevelDir)
  118. else:
  119. print >> sys.stderr, \
  120. 'Usage: python extract.py archive destination [new-top-level-dir]'
  121. sys.exit(2)