Skip to content

Commit

Permalink
util: Explicitly decode/encode in utf-8.
Browse files Browse the repository at this point in the history
The default encoding for python 2 is ascii which can't handle some
characters in, for instance, people's names which have accented letters.
This change explicitly selects the utf-8 encoding which pacifies python
and is mostly equivalent except in these rare cases.

In python 3, the default encoding is utf-8 to begin with, and it's no
longer possible to change it. In this case, explicitly selecting the
encoding is redundant but harmless.

When we support only python 3, then this change can be reverted.

Thanks to Lakin Smith for proposing a related solution and pointing out
some information that led to this one.

Change-Id: I99bd59063c77edd712954ffe90d7de320ade49ea
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/33575
Maintainer: Gabe Black <[email protected]>
Tested-by: kokoro <[email protected]>
Reviewed-by: Lakin Smith <[email protected]>
Reviewed-by: Jason Lowe-Power <[email protected]>
  • Loading branch information
gabemblack committed Aug 27, 2020
1 parent 94000ae commit 52f392b
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion util/git-pre-commit.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
# Show they appropriate object and dump it to a file
status = git.file_from_index(fname)
f = TemporaryFile()
f.write(status.encode())
f.write(status.encode('utf-8'))

verifiers = [ v(ui, opts, base=repo_base) for v in all_verifiers ]
for v in verifiers:
Expand Down
6 changes: 3 additions & 3 deletions util/style/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def status(self, filter=None, files=[], cached=False):
if filter:
cmd += [ "--diff-filter=%s" % filter ]
cmd += [ self.head_revision(), "--" ] + files
status = subprocess.check_output(cmd).decode().rstrip("\n")
status = subprocess.check_output(cmd).decode('utf-8').rstrip("\n")

if status:
return [ f.split("\t") for f in status.split("\n") ]
Expand All @@ -195,12 +195,12 @@ def status(self, filter=None, files=[], cached=False):

def file_from_index(self, name):
return subprocess.check_output(
[ self.git, "show", ":%s" % (name, ) ]).decode()
[ self.git, "show", ":%s" % (name, ) ]).decode('utf-8')

def file_from_head(self, name):
return subprocess.check_output(
[ self.git, "show", "%s:%s" % (self.head_revision(), name) ]) \
.decode()
.decode('utf-8')

def detect_repo(path="."):
"""Auto-detect the revision control system used for a source code
Expand Down
4 changes: 2 additions & 2 deletions util/style/verifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def check(self, filename, regions=all_regions, fobj=None, silent=False):
for num,line in enumerate(fobj):
if num not in regions:
continue
s_line = line.decode().rstrip('\n')
s_line = line.decode('utf-8').rstrip('\n')
if not self.check_line(s_line, language=lang):
if not silent:
self.ui.write("invalid %s in %s:%d\n" % \
Expand Down Expand Up @@ -351,7 +351,7 @@ def check(self, filename, regions=all_regions, fobj=None, silent=False):
close = True
norm_fname = self.normalize_filename(filename)

old = [ l.decode().rstrip('\n') for l in fobj ]
old = [ l.decode('utf-8').rstrip('\n') for l in fobj ]
if close:
fobj.close()

Expand Down

0 comments on commit 52f392b

Please sign in to comment.