blob: bf99348266b70cfab304027c120d3685546e2257 [file] [log] [blame]
"""Defines any IO utilities used by isort"""
import locale
import re
from pathlib import Path
from typing import NamedTuple, Tuple
from .exceptions import UnableToDetermineEncoding
_ENCODING_PATTERN = re.compile(br"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")
class File(NamedTuple):
contents: str
path: Path
encoding: str
@staticmethod
def read(filename: str) -> "File":
file_path = Path(filename).resolve()
contents, encoding = _read_file_contents(file_path)
return File(contents=contents, path=file_path, encoding=encoding)
@staticmethod
def from_contents(contents: str, filename: str) -> "File":
return File(
contents, path=Path(filename).resolve(), encoding=_determine_content_encoding(contents)
)
@property
def extension(self):
return self.path.suffix.lstrip(".")
def _determine_stream_encoding(stream, default: str = "utf-8") -> str:
for line_number, line in enumerate(stream, 1):
if line_number > 2:
break
groups = re.findall(_ENCODING_PATTERN, line)
if groups:
return groups[0].decode("ascii")
return default
def _determine_content_encoding(content: str, default: str = "utf-8"):
return _determine_stream_encoding(content.encode(default).split(b"\n"), default=default)
def _determine_file_encoding(file_path: Path, default: str = "utf-8") -> str:
# see https://www.python.org/dev/peps/pep-0263/
with file_path.open("rb") as open_file:
return _determine_stream_encoding(open_file, default=default)
def _read_file_contents(file_path: Path) -> Tuple[str, str]:
encoding = _determine_file_encoding(file_path)
with file_path.open(encoding=encoding, newline="") as file_to_import_sort:
try:
file_contents = file_to_import_sort.read()
return file_contents, encoding
except UnicodeDecodeError:
pass
# Try default encoding for open(mode='r') on the system
fallback_encoding = locale.getpreferredencoding(False)
with file_path.open(encoding=fallback_encoding, newline="") as file_to_import_sort:
try:
file_contents = file_to_import_sort.read()
return file_contents, fallback_encoding
except UnicodeDecodeError:
pass
raise UnableToDetermineEncoding(file_path, encoding, fallback_encoding)