Skip to content

Commit 19c762d

Browse files
authored
Add encoding CLI arg, retry conversion with UTF-8 on decode failure (#114)
* Add encoding CLI arg, retry conversion with UTF-8 on decode failure, improve typing for convert function, improve filename detection for string arg to convert * Added bezier test, added output check, added missing specs * PR comments * Simplify regex * write output using encoding flag, harden regex
1 parent f34a49d commit 19c762d

File tree

17 files changed

+7257
-32
lines changed

17 files changed

+7257
-32
lines changed

.github/workflows/lint.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,16 @@ jobs:
2525
- run: pip install -r requirements.txt
2626
- run: pip install black ruff pytest pytest-cov
2727
- run: ruff check .
28-
- name: Run mypy if on 3.12, pytype otherwise
28+
- name: Run pytype if on 3.11, pyrefly if on 3.14, mypy otherwise
2929
run: |
3030
if [[ '${{ steps.cpython3.outputs.python-version }}' == 3.11* ]]; then
3131
pip install pytype
3232
pytype -j auto graphviz2drawio
3333
elif [[ '${{ steps.cpython3.outputs.python-version }}' == 3.14* ]]; then
3434
echo "Using pyrefly for Python 3.14"
3535
pip install pyrefly
36-
pyrefly check
36+
pyrefly check graphviz2drawio
3737
else
38-
echo "Using mypy for Python 3.12"
3938
pip install mypy
4039
mypy graphviz2drawio --ignore-missing-imports
4140
fi

.idea/ryecharm-overrides.xml

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/ryecharm.xml

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

graphviz2drawio/__main__.py

100644100755
Lines changed: 54 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
import pathlib
1+
#!/usr/bin/env python3
2+
23
import sys
34
from argparse import Namespace
45
from io import TextIOWrapper
6+
from pathlib import Path
57
from sys import stderr
68

79
from .graphviz2drawio import convert
@@ -12,33 +14,63 @@
1214
RED_TEXT = "\033[91m"
1315
BOLD = "\033[1m"
1416

17+
UTF8 = "utf-8"
18+
1519

1620
def _gv_filename_to_xml(filename: str) -> str:
1721
return ".".join(filename.split(".")[:-1]) + ".xml"
1822

1923

20-
def _convert_file(to_convert: TextIOWrapper, program: str, outfile: str | None) -> None:
24+
def _write_stderr_message(to_convert: str) -> None:
25+
stderr.write(f"{RED_TEXT}{BOLD}Error converting {to_convert}\n")
26+
stderr.write("Please open a report at\n")
27+
stderr.write("https://github.com/hbmartin/graphviz2drawio/issues\n")
28+
stderr.write("and include your diagram and the following information:\n\n")
29+
stderr.write(DEFAULT_TEXT)
30+
stderr.write(f"Python: {sys.version}, g2d: {__version__}\n")
31+
32+
33+
def _convert_file(
34+
to_convert: Path | TextIOWrapper,
35+
program: str,
36+
encoding: str,
37+
outfile: str | None,
38+
) -> None:
39+
output: str | None = None
2140
try:
22-
output = convert(to_convert.read(), program)
23-
except BaseException:
24-
stderr.write(f"{RED_TEXT}{BOLD}Error converting {to_convert}\n")
25-
stderr.write("Please open a report at\n")
26-
stderr.write("https://github.com/hbmartin/graphviz2drawio/issues\n")
27-
stderr.write("and include your diagram and the following error:\n\n")
28-
stderr.write(DEFAULT_TEXT)
29-
stderr.write(f"Python: {sys.version}, g2d: {__version__}\n")
41+
if isinstance(to_convert, TextIOWrapper):
42+
output = convert(to_convert, program)
43+
elif isinstance(to_convert, Path):
44+
with to_convert.open(encoding=encoding) as contents:
45+
output = convert(contents.read(), program)
46+
except UnicodeDecodeError:
47+
if encoding.lower() != UTF8 and isinstance(to_convert, Path):
48+
# Attempt to automatically recover for file. Chinese Windows systems in
49+
# particular often use other encodings e.g. gbk, cp950, cp1252, etc. but
50+
# the actual dot files are still UTF-8 encoded
51+
# https://github.com/hbmartin/graphviz2drawio/issues/105
52+
return _convert_file(to_convert, program, UTF8, outfile)
53+
54+
_write_stderr_message(str(to_convert))
3055
raise
31-
finally:
32-
to_convert.close()
56+
57+
except Exception:
58+
_write_stderr_message(str(to_convert))
59+
raise
60+
61+
if output is None:
62+
_write_stderr_message(str(to_convert))
63+
return None
3364

3465
if outfile is None:
3566
print(output)
36-
return
67+
return None
3768

38-
out_path = pathlib.Path(outfile)
69+
out_path = Path(outfile)
3970
out_path.parent.mkdir(parents=True, exist_ok=True)
40-
out_path.write_text(output)
71+
out_path.write_text(output, encoding=encoding)
4172
stderr.write("Converted file: " + outfile + "\n")
73+
return None
4274

4375

4476
def main() -> None:
@@ -50,7 +82,7 @@ def main() -> None:
5082
_validate_args(args)
5183

5284
if args.stdout and args.outfile is not None:
53-
sys.stdout.write(f"Writing to {args.outfile} (ignoring stdout)\n")
85+
sys.stderr.write(f"Writing to {args.outfile} (ignoring stdout)\n")
5486

5587
if len(args.to_convert) == 1:
5688
in_files = args.to_convert
@@ -60,7 +92,12 @@ def main() -> None:
6092
out_files = [_gv_filename_to_xml(in_file.name) for in_file in args.to_convert]
6193

6294
for in_file, out_file in zip(in_files, out_files, strict=True):
63-
_convert_file(in_file, args.program, out_file)
95+
_convert_file(
96+
to_convert=in_file,
97+
program=args.program,
98+
encoding=args.encoding,
99+
outfile=out_file,
100+
)
64101

65102

66103
def _determine_single_output(args: Namespace) -> list[str | None]:

graphviz2drawio/graphviz2drawio.py

100755100644
Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
#!/usr/bin/env python3
2-
3-
from typing import IO
1+
import re
2+
from io import TextIOBase
3+
from pathlib import Path
4+
from typing import TextIO
45

56
from pygraphviz import AGraph
67

@@ -9,18 +10,19 @@
910
from .mx.MxGraph import MxGraph
1011

1112

12-
def convert(graph_to_convert: AGraph | str | IO, layout_prog: str = "dot") -> str:
13-
if isinstance(graph_to_convert, AGraph):
14-
graph = graph_to_convert
15-
else:
16-
graph = AGraph(graph_to_convert)
13+
def convert(
14+
graph_to_convert: AGraph | str | TextIOBase | Path | TextIO,
15+
layout_prog: str = "dot",
16+
) -> str:
17+
graph = _load_pygraphviz_graph(graph_to_convert)
1718

1819
graph_edges: dict[str, dict] = {
1920
f"{e[0]}->{e[1]}-"
2021
# pyrefly: ignore # missing-attribute
2122
+ (e.attr.get("xlabel") or e.attr.get("label") or ""): e.attr.to_dict()
2223
for e in graph.edges_iter()
2324
}
25+
2426
# pyrefly: ignore # missing-attribute
2527
graph_nodes: dict[str, dict] = {n: n.attr.to_dict() for n in graph.nodes_iter()}
2628

@@ -45,3 +47,26 @@ def convert(graph_to_convert: AGraph | str | IO, layout_prog: str = "dot") -> st
4547
# Put clusters first, so that nodes are drawn in front
4648
mx_graph = MxGraph(clusters, nodes, edges)
4749
return mx_graph.value()
50+
51+
52+
def _load_pygraphviz_graph(
53+
graph_to_convert: AGraph | str | TextIOBase | Path | TextIO,
54+
) -> AGraph:
55+
if isinstance(graph_to_convert, AGraph):
56+
return graph_to_convert
57+
if isinstance(graph_to_convert, str):
58+
# This fixes a pygraphviz bug where a string beginning with a comment
59+
# is mistakenly identified as a filename.
60+
# https://github.com/pygraphviz/pygraphviz/issues/536
61+
pattern = re.compile(
62+
pattern=r"^(?=(\s*))\1(strict)?(?=(\s*))\3(graph|digraph)[^{]*{",
63+
flags=re.MULTILINE,
64+
)
65+
if pattern.search(graph_to_convert):
66+
return AGraph(string=graph_to_convert)
67+
return AGraph(filename=graph_to_convert)
68+
# pyrefly: ignore # missing-attribute
69+
if hasattr(graph_to_convert, "read") and callable(graph_to_convert.read):
70+
return AGraph(string=graph_to_convert.read())
71+
# Use builtin type detection which includes: hasattr(thing, "open")
72+
return AGraph(graph_to_convert)

graphviz2drawio/models/Arguments.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
1-
from argparse import ArgumentParser, FileType
1+
import sys
2+
from argparse import ArgumentParser
3+
from locale import getpreferredencoding
4+
from pathlib import Path
25
from sys import stdin
6+
from typing import TextIO
7+
8+
9+
class NonOpeningFileType:
10+
def __call__(self, string: str) -> TextIO | Path:
11+
# the special argument "-" means sys.std{in,out}
12+
return sys.stdin if string == "-" else Path(string)
313

414

515
class Arguments(ArgumentParser):
@@ -13,7 +23,7 @@ def __init__(self, version: str) -> None:
1323
metavar="file(s).dot",
1424
help="Path of the graphviz file(s) to convert (or stdin).",
1525
nargs="*",
16-
type=FileType("r"),
26+
type=NonOpeningFileType(),
1727
default=[stdin],
1828
)
1929
self.add_argument(
@@ -34,9 +44,16 @@ def __init__(self, version: str) -> None:
3444
self.add_argument(
3545
"-p",
3646
"--program",
37-
help="layout program (defaults to dot)",
47+
help="Layout program (defaults to dot)",
3848
default="dot",
3949
)
50+
self.add_argument(
51+
"--encoding",
52+
"-e",
53+
type=str,
54+
default=getpreferredencoding(do_setlocale=False).lower(),
55+
help="Encoding to use when opening files (default: %(default)s)",
56+
)
4057
self.add_argument(
4158
"--version",
4259
action="version",

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ line-length = 88
1010
indent-width = 4
1111

1212
lint.select = ["ALL"]
13-
lint.ignore = ["D100", "D101", "D102", "D103", "D104", "D105", "D107", "D203", "D213", "ERA001", "ICN001", "PLR0913", "S314", "SIM102", "TD002", "TID252", "N999"]
13+
lint.ignore = ["D100", "D101", "D102", "D103", "D104", "D105", "D107", "D203", "D213", "ERA001", "ICN001", "PLR0913", "PT009", "S314", "SIM102", "TD002", "TID252", "N999"]
1414
target-version = "py310"
1515

1616
[tool.ruff.format]

0 commit comments

Comments
 (0)