bpo34056-always-return-bytes-from-_HackedGetData.get_data.patch 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. From 7bd6f0e5500f778e940374237b94651f60ae1990 Mon Sep 17 00:00:00 2001
  2. From: "Miss Islington (bot)"
  3. <31488909+miss-islington@users.noreply.github.com>
  4. Date: Fri, 6 Jul 2018 21:00:45 -0700
  5. Subject: [PATCH] closes bpo-34056: Always return bytes from
  6. _HackedGetData.get_data(). (GH-8130)
  7. * Always return bytes from _HackedGetData.get_data().
  8. Ensure the imp.load_source shim always returns bytes by reopening the file in
  9. binary mode if needed. Hash-based pycs have to receive the source code in bytes.
  10. It's tempting to change imp.get_suffixes() to always return 'rb' as a mode, but
  11. that breaks some stdlib tests and likely 3rdparty code, too.
  12. (cherry picked from commit b0274f2cddd36b49fe5080efbe160277ef546471)
  13. Co-authored-by: Benjamin Peterson <benjamin@python.org>
  14. ---
  15. Lib/imp.py | 13 ++++++-------
  16. Lib/test/test_imp.py | 15 +++++++++++++++
  17. 2 files changed, 21 insertions(+), 7 deletions(-)
  18. diff --git a/Lib/imp.py b/Lib/imp.py
  19. index 866464b245b2..31f8c766381a 100644
  20. --- a/Lib/imp.py
  21. +++ b/Lib/imp.py
  22. @@ -142,17 +142,16 @@ def __init__(self, fullname, path, file=None):
  23. def get_data(self, path):
  24. """Gross hack to contort loader to deal w/ load_*()'s bad API."""
  25. if self.file and path == self.path:
  26. + # The contract of get_data() requires us to return bytes. Reopen the
  27. + # file in binary mode if needed.
  28. if not self.file.closed:
  29. file = self.file
  30. - else:
  31. - self.file = file = open(self.path, 'r')
  32. + if 'b' not in file.mode:
  33. + file.close()
  34. + if self.file.closed:
  35. + self.file = file = open(self.path, 'rb')
  36. with file:
  37. - # Technically should be returning bytes, but
  38. - # SourceLoader.get_code() just passed what is returned to
  39. - # compile() which can handle str. And converting to bytes would
  40. - # require figuring out the encoding to decode to and
  41. - # tokenize.detect_encoding() only accepts bytes.
  42. return file.read()
  43. else:
  44. return super().get_data(path)
  45. diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py
  46. index a115e60d4e4f..bb0144b12d41 100644
  47. --- a/Lib/test/test_imp.py
  48. +++ b/Lib/test/test_imp.py
  49. @@ -2,6 +2,7 @@
  50. import importlib.util
  51. import os
  52. import os.path
  53. +import py_compile
  54. import sys
  55. from test import support
  56. from test.support import script_helper
  57. @@ -350,6 +351,20 @@ def test_pyc_invalidation_mode_from_cmdline(self):
  58. res = script_helper.assert_python_ok(*args)
  59. self.assertEqual(res.out.strip().decode('utf-8'), expected)
  60. + def test_find_and_load_checked_pyc(self):
  61. + # issue 34056
  62. + with support.temp_cwd():
  63. + with open('mymod.py', 'wb') as fp:
  64. + fp.write(b'x = 42\n')
  65. + py_compile.compile(
  66. + 'mymod.py',
  67. + doraise=True,
  68. + invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
  69. + )
  70. + file, path, description = imp.find_module('mymod', path=['.'])
  71. + mod = imp.load_module('mymod', file, path, description)
  72. + self.assertEqual(mod.x, 42)
  73. +
  74. class ReloadTests(unittest.TestCase):