Skip to content

Commit bf81954

Browse files
authored
Fix formatting files with unicode (#242)
* setup a failing test * get fixture to actually work with help from copilot * make failing tests * make it pass * simplify fixture setup * format doesn't fix method names, so make output be able to pass * clean up changes some * format * undo adding pytest-mock, not needed yet * Apply suggestions from code review
1 parent 2d81090 commit bf81954

File tree

14 files changed

+235
-8
lines changed

14 files changed

+235
-8
lines changed

ni_python_styleguide/_fix.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
import pathlib
3+
import shutil
34
import typing
45
from collections import defaultdict
56
from typing import Iterable
@@ -21,7 +22,7 @@
2122

2223

2324
def _sort_imports(file: pathlib.Path, app_import_names):
24-
raw = file.read_text()
25+
raw = file.read_text(encoding=_utils.DEFAULT_ENCODING)
2526
isort_config = isort.Config(
2627
settings_file=str(_config_constants.ISORT_CONFIG_FILE),
2728
known_first_party=filter(None, app_import_names.split(",")),
@@ -30,7 +31,7 @@ def _sort_imports(file: pathlib.Path, app_import_names):
3031
raw,
3132
config=isort_config,
3233
)
33-
file.write_text(output)
34+
file.write_text(output, encoding=_utils.DEFAULT_ENCODING)
3435

3536

3637
def _format_imports(file: pathlib.Path, app_import_names: Iterable[str]) -> None:
@@ -117,13 +118,13 @@ def fix(
117118
)
118119
else:
119120
with temp_file.multi_access_tempfile(suffix="__" + bad_file.name) as working_file:
120-
working_file.write_text(bad_file.read_text())
121+
shutil.copyfile(bad_file, working_file)
121122
_format.format(working_file, "-q")
122123
_format_imports(file=working_file, app_import_names=app_import_names)
123124

124125
diff_lines = better_diff.unified_plus.format_diff(
125-
bad_file.read_text(),
126-
working_file.read_text(),
126+
bad_file.read_text(encoding=_utils.DEFAULT_ENCODING),
127+
working_file.read_text(encoding=_utils.DEFAULT_ENCODING),
127128
fromfile=f"{_posix_relative_if_under(bad_file, pathlib.Path.cwd())}",
128129
tofile=f"{_posix_relative_if_under(bad_file, pathlib.Path.cwd())}_formatted",
129130
)

ni_python_styleguide/_utils/code_analysis.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ def find_import_region(file: pathlib.Path) -> Tuple[int, int]:
1111
file: pathlib.Path path to file to evaluate
1212
Return: Tuple[int, int] the start and ending lines (0 based) of the module level imports
1313
"""
14-
file_contents = file.read_text()
14+
file_contents = file.read_text(
15+
encoding="utf-8"
16+
) # can't use DEFAULT_ENCODING here due to possible circular imports
1517
tree = ast.parse(file_contents)
1618
end = start = 0
1719
for node in tree.body: # only walk top level items

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ pytest = ">=6.0.1"
5757
pytest_click = ">=1.0.2"
5858
pytest-snapshot = ">=0.6.3"
5959

60-
6160
[tool.poetry.scripts]
6261
ni-python-styleguide = 'ni_python_styleguide._cli:main'
6362
nps = 'ni_python_styleguide._cli:main'

tests/conftest.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Useful plugins/fixtures which can (and should) be used in any test."""
22

33
import os
4+
import pathlib
45

56
import click.testing
67
import pytest
@@ -59,3 +60,26 @@ def chdir():
5960
cwd = os.getcwd()
6061
yield os.chdir
6162
os.chdir(cwd)
63+
64+
65+
@pytest.fixture(autouse=True)
66+
def force_ascii_encoding(monkeypatch):
67+
"""Force ASCII encoding as default for all file operations to catch missing encoding args."""
68+
# Patch pathlib.Path.read_text and write_text to use ASCII when encoding not specified
69+
original_read_text = pathlib.Path.read_text
70+
original_write_text = pathlib.Path.write_text
71+
72+
def ascii_read_text(self, encoding=None, errors=None):
73+
if encoding is None:
74+
encoding = "ascii"
75+
return original_read_text(self, encoding=encoding, errors=errors)
76+
77+
def ascii_write_text(self, data, encoding=None, errors=None, **kwargs):
78+
if encoding is None:
79+
encoding = "ascii"
80+
return original_write_text(self, data, encoding=encoding, errors=errors, **kwargs)
81+
82+
monkeypatch.setattr(pathlib.Path, "read_text", ascii_read_text)
83+
monkeypatch.setattr(pathlib.Path, "write_text", ascii_write_text)
84+
85+
yield None

tests/test_cli/acknowledge_existing_errors_test_cases__snapshots/unicode_in_files/input.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,13 @@ def problem_chars(self):
1717
def method_withBadName_andParams(my_normal_param, myBadlyNamedParam, my_other_Bad_param):
1818
"""Provide example where black will want to split out result."""
1919
return 5 + 7
20+
21+
data = ("device_name, supported_encodings",
22+
[
23+
("Gerät", ["1252", "iso-8859-1", "utf-8"]),
24+
("l' appareil", ["1252", "iso-8859-1", "utf-8"]),
25+
("デバイス", ["932", "shift-jis", "utf-8"]),
26+
("장치", ["utf-8", "euc-kr"]),
27+
("设备", ["utf-8", "gbk"]),
28+
],
29+
)

tests/test_cli/acknowledge_existing_errors_test_cases__snapshots/unicode_in_files/output.py.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,13 @@ class Foo:
1717
def method_withBadName_andParams(my_normal_param, myBadlyNamedParam, my_other_Bad_param): # noqa: N803, N802 - argument name 'myBadlyNamedParam' should be lowercase (auto-generated noqa), function name 'method_withBadName_andParams' should be lowercase (auto-generated noqa)
1818
"""Provide example where black will want to split out result."""
1919
return 5 + 7
20+
21+
data = ("device_name, supported_encodings",
22+
[
23+
("Gerät", ["1252", "iso-8859-1", "utf-8"]),
24+
("l' appareil", ["1252", "iso-8859-1", "utf-8"]),
25+
("デバイス", ["932", "shift-jis", "utf-8"]),
26+
("장치", ["utf-8", "euc-kr"]),
27+
("设备", ["utf-8", "gbk"]),
28+
],
29+
)

tests/test_cli/acknowledge_existing_errors_test_cases__snapshots/unicode_in_files/output__aggressive.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,15 @@ def method_withBadName_andParams( # noqa: N802 - function name 'method_withBadN
2121
):
2222
"""Provide example where black will want to split out result."""
2323
return 5 + 7
24+
25+
26+
data = (
27+
"device_name, supported_encodings",
28+
[
29+
("Gerät", ["1252", "iso-8859-1", "utf-8"]),
30+
("l' appareil", ["1252", "iso-8859-1", "utf-8"]),
31+
("デバイス", ["932", "shift-jis", "utf-8"]),
32+
("장치", ["utf-8", "euc-kr"]),
33+
("设备", ["utf-8", "gbk"]),
34+
],
35+
)
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
"""Unicode in file should not cause error (e.g., ©)."""
2+
3+
4+
class Foo:
5+
"""Example class with unicode consts."""
6+
7+
def __init__(self) -> None:
8+
"""Instantiate Foo class."""
9+
self._problem_chars = "π”"
10+
11+
@property
12+
def problem_chars(self):
13+
"""Return stored string with a unicode char."""
14+
return self._problem_chars
15+
16+
17+
def method_withBadName_andParams(my_normal_param, myBadlyNamedParam, my_other_Bad_param):
18+
"""Provide example where black will want to split out result."""
19+
return 5 + 7
20+
21+
data = ("device_name, supported_encodings",
22+
[
23+
("Gerät", ["1252", "iso-8859-1", "utf-8"]),
24+
("l' appareil", ["1252", "iso-8859-1", "utf-8"]),
25+
("デバイス", ["932", "shift-jis", "utf-8"]),
26+
("장치", ["utf-8", "euc-kr"]),
27+
("设备", ["utf-8", "gbk"]),
28+
],
29+
)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""Unicode in file should not cause error (e.g., ©)."""
2+
3+
4+
class Foo:
5+
"""Example class with unicode consts."""
6+
7+
def __init__(self) -> None:
8+
"""Instantiate Foo class."""
9+
self._problem_chars = "π”"
10+
11+
@property
12+
def problem_chars(self):
13+
"""Return stored string with a unicode char."""
14+
return self._problem_chars
15+
16+
17+
def method_withBadName_andParams(my_normal_param, myBadlyNamedParam, my_other_Bad_param):
18+
"""Provide example where black will want to split out result."""
19+
return 5 + 7
20+
21+
22+
data = (
23+
"device_name, supported_encodings",
24+
[
25+
("Gerät", ["1252", "iso-8859-1", "utf-8"]),
26+
("l' appareil", ["1252", "iso-8859-1", "utf-8"]),
27+
("デバイス", ["932", "shift-jis", "utf-8"]),
28+
("장치", ["utf-8", "euc-kr"]),
29+
("设备", ["utf-8", "gbk"]),
30+
],
31+
)
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
"""Unicode in file should not cause error (e.g., ©)."""
2+
3+
4+
class Foo:
5+
"""Example class with unicode consts."""
6+
7+
def __init__(self) -> None:
8+
"""Instantiate Foo class."""
9+
self._problem_chars = "π”"
10+
11+
@property
12+
def problem_chars(self):
13+
"""Return stored string with a unicode char."""
14+
return self._problem_chars
15+
16+
17+
def method_withBadName_andParams( # noqa: N802 - function name 'method_withBadName_andParams' should be lowercase (auto-generated noqa)
18+
my_normal_param,
19+
myBadlyNamedParam, # noqa: N803 - argument name 'myBadlyNamedParam' should be lowercase (auto-generated noqa)
20+
my_other_Bad_param, # noqa: N803 - argument name 'my_other_Bad_param' should be lowercase (auto-generated noqa)
21+
):
22+
"""Provide example where black will want to split out result."""
23+
return 5 + 7
24+
25+
26+
data = (
27+
"device_name, supported_encodings",
28+
[
29+
("Gerät", ["1252", "iso-8859-1", "utf-8"]),
30+
("l' appareil", ["1252", "iso-8859-1", "utf-8"]),
31+
("デバイス", ["932", "shift-jis", "utf-8"]),
32+
("장치", ["utf-8", "euc-kr"]),
33+
("设备", ["utf-8", "gbk"]),
34+
],
35+
)

0 commit comments

Comments
 (0)