Python is great. It's so easy to write, and there are so many useful libraries out there. But then again, let's be honest. It's not the most performant language out there. And that's exactly the problem sometimes. We just hit the wall. Rewriting parts of our code in another language might speed things up significantly - especially when the code is compute-heavy. In such cases, Rust is a great choice due to its performance, memory safety, and strong support for Python bindings. For example, Pydantic is a Python package that's actually written in Rust and only uses Python bindings, so we can use it directly from Python.
This article is part 1 of a series AI-Powered Rewrite from Python to Rust
You can find the repository with all examples here
Is logic well tested?
First things first. Rewrites are always a tricky thing. Especially if we want to implement the same behavior in another programming language. In such cases, we know we want to preserve both the behavior and the interface. It's just the performance that's bugging us. Since we don't want to alter the behavior, the first step is to ensure the existing logic is well tested.
For example, let's say that we have a log parser for logs like this one:
2024-01-15T10:23:45.123Z [INFO] service=auth request_id=abc-123 user_id=42 action=login duration_ms=150 status=success
[ERROR] 2024-01-15T10:23:45.456Z service=payment request_id=def-456 user_id=17 action=charge amount=99.99 error="timeout after 5000ms"
2024-01-15 10:23:46 WARN service=auth request_id=ghi-789 user_id=42 action=token_refresh retry_count=3
2024-01-15T10:23:47.001Z [DEBUG] service=gateway msg="Health check passed, all systems operational"
ERROR 2024-01-15T10:23:48.000Z service=payment request_id=jkl-012 user_id=99 action=refund amount=50.00 error="connection reset by peer" retry_count=2
2024-01-15T10:23:49.500Z [INFO] service=auth request_id=mno-345 user_id= action=anonymous_browse duration_ms=5
-- system restart at 2024-01-15T10:24:00Z --
2024/01/15 10:24:01 [INFO] service=gateway request_id=pqr-678 action=startup msg="Service initialized successfully"
2024-01-15T10:24:02.100Z [ERROR] service=auth request_id=stu-901 user_id=42 action=login duration_ms=3200 status=failure error="LDAP timeout" details={host="ldap-1.internal",port=636,ssl=true}
It's written in Python:
import re
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
@dataclass
class LogEntry:
timestamp: Optional[datetime]
level: Optional[str]
fields: dict = field(default_factory=dict)
raw: str = ""
class LogParser:
TIMESTAMP_PATTERNS = [
(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z", "%Y-%m-%dT%H:%M:%S.%fZ"),
(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z", "%Y-%m-%dT%H:%M:%SZ"),
(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", "%Y-%m-%d %H:%M:%S"),
(r"\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2}", "%Y/%m/%d %H:%M:%S"),
]
LEVEL_PATTERN = re.compile(r"\[?(INFO|ERROR|WARN|DEBUG|TRACE|FATAL)\]?")
def load(self, path: str) -> list[LogEntry]:
entries = []
with open(path) as f:
for line in f:
entry = self._parse_line(line.rstrip("\n"))
if entry is not None:
entries.append(entry)
return entries
def _parse_timestamp(self, line: str) -> tuple[Optional[datetime], str]:
for pattern, fmt in self.TIMESTAMP_PATTERNS:
match = re.search(pattern, line)
if match:
ts = datetime.strptime(match.group(), fmt)
remaining = line[:match.start()] + line[match.end():]
return ts, remaining
return None, line
def _parse_level(self, line: str) -> tuple[Optional[str], str]:
match = self.LEVEL_PATTERN.search(line)
if match:
level = match.group().strip("[]")
remaining = line[:match.start()] + line[match.end():]
return level, remaining
return None, line
def _parse_nested(self, value: str) -> dict:
result = {}
inner = value.strip("{}")
parts = []
current = ""
in_quotes = False
for char in inner:
if char == '"' and (not current or current[-1] != "\\"):
in_quotes = not in_quotes
current += char
elif char == "," and not in_quotes:
parts.append(current.strip())
current = ""
else:
current += char
if current.strip():
parts.append(current.strip())
for part in parts:
if "=" in part:
k, v = part.split("=", 1)
v = v.strip('"')
if v == "true":
result[k] = True
elif v == "false":
result[k] = False
else:
try:
result[k] = int(v)
except ValueError:
try:
result[k] = float(v)
except ValueError:
result[k] = v
return result
def _parse_fields(self, line: str) -> dict:
fields = {}
i = 0
line = line.strip()
while i < len(line):
if line[i] in (" ", "\t"):
i += 1
continue
eq_pos = line.find("=", i)
if eq_pos == -1:
break
key = line[i:eq_pos].strip()
if " " in key:
i = line.find(" ", i) + 1
if i == 0:
break
continue
i = eq_pos + 1
if i < len(line) and line[i] == "{":
brace_count = 1
start = i
i += 1
while i < len(line) and brace_count > 0:
if line[i] == "{":
brace_count += 1
elif line[i] == "}":
brace_count -= 1
i += 1
fields[key] = self._parse_nested(line[start:i])
elif i < len(line) and line[i] == '"':
i += 1
start = i
while i < len(line) and line[i] != '"':
if line[i] == "\\" and i + 1 < len(line):
i += 2
else:
i += 1
fields[key] = line[start:i]
if i < len(line):
i += 1
else:
start = i
while i < len(line) and line[i] not in (" ", "\t"):
i += 1
value = line[start:i]
if not value:
fields[key] = None
elif value == "true":
fields[key] = True
elif value == "false":
fields[key] = False
else:
try:
fields[key] = int(value)
except ValueError:
try:
fields[key] = float(value)
except ValueError:
fields[key] = value
return fields
def _is_noise_line(self, line: str) -> bool:
stripped = line.strip()
if not stripped:
return True
if stripped.startswith("--") and stripped.endswith("--"):
return True
return False
def _parse_line(self, line: str) -> Optional[LogEntry]:
if self._is_noise_line(line):
return None
timestamp, remaining = self._parse_timestamp(line)
level, remaining = self._parse_level(remaining)
if timestamp is None and level is None:
return None
fields = self._parse_fields(remaining)
return LogEntry(
timestamp=timestamp,
level=level,
fields=fields,
raw=line,
)
It even has a test:
import os
import tempfile
from log_parser import LogParser
def test_parse_info_log():
line = '2024-01-15T10:23:45.123Z [INFO] service=auth request_id=abc-123 user_id=42 action=login duration_ms=150 status=success'
with tempfile.NamedTemporaryFile(mode='w', suffix='.log', delete=False) as f:
f.write(line + '\n')
f.flush()
path = f.name
try:
parser = LogParser()
entries = parser.load(path)
assert len(entries) == 1
entry = entries[0]
assert entry.level == "INFO"
assert entry.fields["service"] == "auth"
assert entry.fields["user_id"] == 42
assert entry.fields["duration_ms"] == 150
finally:
os.unlink(path)
It's working as expected, and it even has one test. Anyhow, if we take a deeper look, we can quickly see that only a handful of logic is tested: - only one timestamp format - only one log level - only a single-line file - only some fields from the log line
Looking at the implementation, we can also see features such as nested fields, different timestamp formats, and garbage lines. Therefore, the first step is to add the missing tests.
Logic is well tested
We can start by prompting our AI agent with the following to prepare a plan:
I'd like you to improve the tests for the log parser. The current test is minimal — please expand coverage to thoroughly test all parser behavior:
- Parametrize over all timestamp formats and log levels
- Cover nested field parsing, noise line filtering, all field types (string, int, float, bool, null, quoted with escapes), mixed types in one line, and multiline files
- Test edge cases: missing timestamp, missing fields, nonexistent file, comma in quoted nested values, garbage text before valid fields
- Use a create_log_file factory fixture that returns a file path, and a parser fixture for the LogParser instance — keep .load() calls visible in each test
Can you prepare a plan?
Once we have a plan, we should check whether there's anything missing. Potentially, we can reiterate to include missing things or change the approach. Once we're happy with the plan, it should look something like this:
---
Context
The log parser has a single test. We need comprehensive test coverage before porting to Rust, so we can use the same test cases as a spec.
Plan
1. Set up fixtures
File: test_log_parser.py
- create_log_file — factory fixture that writes lines to a temp file and returns the path. Cleans up in teardown.
- parser — returns a LogParser() instance.
2. Timestamp formats (parametrize)
Cover all 4 patterns from TIMESTAMP_PATTERNS:
- 2024-01-15T10:23:45.123Z
- 2024-01-15T10:23:45Z
- 2024-01-15 10:23:45
- 2024/01/15 10:23:45
3. Log levels (parametrize)
- All 6 levels with brackets: [INFO], [ERROR], [WARN], [DEBUG], [TRACE], [FATAL]
- All 6 levels without brackets
4. Nested field parsing
- String, int, and bool (true/false) values inside {}
- Float values inside {}
- Comma inside a quoted value not splitting: {msg="a,b",count=1}
5. Noise lines ignored
- Empty and whitespace-only lines
- -- separator -- lines
- Lines with neither timestamp nor level
6. Field types
- Unquoted string, quoted string, quoted with escaped \"
- Integer, float, boolean (true/false), null (empty value key=)
- Mixed types in a single line
7. Multiline files
- File with mixed formats, noise lines interspersed — verify count and level order
8. Edge cases
- Timestamp appearing after level: [ERROR] 2024-01-15T...
- Level only, no timestamp — still parsed with timestamp=None
- Timestamp + level, no fields — fields == {}
- Garbage text before valid key=value — skipped
- raw field preserves original line
- Nonexistent file path raises FileNotFoundError
Verification
uv run pytest
After that, we start Claude in auto-accept-edits mode and wait for the magic to happen.
Once it's done, we check again whether anything is missing and evaluate tests with 7 R's of high-quality tests.
For our LogParser, they should look like this:
import os
import tempfile
from datetime import datetime
import pytest
from log_parser import LogParser
@pytest.fixture
def create_log_file():
paths = []
def _create(*lines: str) -> str:
with tempfile.NamedTemporaryFile(mode="w", suffix=".log", delete=False) as f:
for line in lines:
f.write(line + "\n")
paths.append(f.name)
return paths[-1]
yield _create
for path in paths:
os.unlink(path)
@pytest.fixture
def parser():
return LogParser()
# --- Timestamp formats (parametrized) ---
@pytest.mark.parametrize(
"line, expected_ts",
[
(
"2024-01-15T10:23:45.123Z [INFO] service=auth",
datetime(2024, 1, 15, 10, 23, 45, 123000),
),
(
"2024-01-15T10:23:45Z [INFO] service=auth",
datetime(2024, 1, 15, 10, 23, 45),
),
(
"2024-01-15 10:23:45 [INFO] service=auth",
datetime(2024, 1, 15, 10, 23, 45),
),
(
"2024/01/15 10:23:45 [INFO] service=auth",
datetime(2024, 1, 15, 10, 23, 45),
),
],
ids=[
"iso8601_fractional",
"iso8601_no_fractional",
"datetime_dashes",
"datetime_slashes",
],
)
def test_timestamp_formats(parser, create_log_file, line, expected_ts):
path = create_log_file(line)
entries = parser.load(path)
assert len(entries) == 1
assert entries[0].timestamp == expected_ts
# --- All log levels ---
@pytest.mark.parametrize("level", ["INFO", "ERROR", "WARN", "DEBUG", "TRACE", "FATAL"])
def test_all_levels(parser, create_log_file, level):
path = create_log_file(f"2024-01-15T10:23:45Z [{level}] service=app")
entries = parser.load(path)
assert len(entries) == 1
assert entries[0].level == level
@pytest.mark.parametrize("level", ["INFO", "ERROR", "WARN", "DEBUG", "TRACE", "FATAL"])
def test_level_without_brackets(parser, create_log_file, level):
path = create_log_file(f"2024-01-15T10:23:45Z {level} service=app")
entries = parser.load(path)
assert entries[0].level == level
# --- Nested field parsing ---
def test_nested_fields(parser, create_log_file):
path = create_log_file(
'2024-01-15T10:23:45Z [ERROR] details={host="ldap-1.internal",port=636,ssl=true}'
)
entries = parser.load(path)
assert len(entries) == 1
details = entries[0].fields["details"]
assert details["host"] == "ldap-1.internal"
assert details["port"] == 636
assert details["ssl"] is True
def test_nested_fields_with_false(parser, create_log_file):
path = create_log_file(
"2024-01-15T10:23:45Z [INFO] config={debug=false,retries=3}"
)
entries = parser.load(path)
config = entries[0].fields["config"]
assert config["debug"] is False
assert config["retries"] == 3
def test_nested_fields_with_float(parser, create_log_file):
path = create_log_file(
"2024-01-15T10:23:45Z [INFO] stats={avg=12.5,count=100}"
)
entries = parser.load(path)
stats = entries[0].fields["stats"]
assert stats["avg"] == 12.5
assert stats["count"] == 100
# --- Noise lines ignored ---
def test_empty_lines_ignored(parser, create_log_file):
path = create_log_file(
"2024-01-15T10:23:45Z [INFO] service=auth",
"",
" ",
"2024-01-15T10:23:46Z [ERROR] service=payment",
)
entries = parser.load(path)
assert len(entries) == 2
def test_separator_lines_ignored(parser, create_log_file):
path = create_log_file(
"2024-01-15T10:23:45Z [INFO] service=auth",
"-- system restart at 2024-01-15T10:24:00Z --",
"2024-01-15T10:23:46Z [ERROR] service=payment",
)
entries = parser.load(path)
assert len(entries) == 2
def test_line_without_timestamp_or_level_ignored(parser, create_log_file):
path = create_log_file("just some random text with no structure")
entries = parser.load(path)
assert len(entries) == 0
# --- Field types ---
def test_string_fields(parser, create_log_file):
path = create_log_file(
'2024-01-15T10:23:45Z [INFO] msg="hello world" service=auth'
)
entries = parser.load(path)
assert entries[0].fields["msg"] == "hello world"
assert entries[0].fields["service"] == "auth"
def test_quoted_string_with_escaped_quotes(parser, create_log_file):
path = create_log_file(
r'2024-01-15T10:23:45Z [ERROR] error="failed to parse \"config.json\"" service=app'
)
entries = parser.load(path)
assert "config.json" in entries[0].fields["error"]
def test_integer_field(parser, create_log_file):
path = create_log_file("2024-01-15T10:23:45Z [INFO] duration_ms=150 user_id=42")
entries = parser.load(path)
assert entries[0].fields["duration_ms"] == 150
def test_float_field(parser, create_log_file):
path = create_log_file("2024-01-15T10:23:45Z [INFO] amount=99.99")
entries = parser.load(path)
assert entries[0].fields["amount"] == 99.99
def test_boolean_fields(parser, create_log_file):
path = create_log_file("2024-01-15T10:23:45Z [INFO] success=true failed=false")
entries = parser.load(path)
assert entries[0].fields["success"] is True
assert entries[0].fields["failed"] is False
def test_empty_value_field(parser, create_log_file):
path = create_log_file("2024-01-15T10:23:45Z [INFO] user_id= service=auth")
entries = parser.load(path)
assert entries[0].fields["user_id"] is None
def test_mixed_field_types_in_one_line(parser, create_log_file):
path = create_log_file(
'2024-01-15T10:23:45Z [INFO] service=auth user_id=42 amount=9.99 success=true msg="ok"'
)
entries = parser.load(path)
fields = entries[0].fields
assert fields["service"] == "auth"
assert fields["user_id"] == 42
assert fields["amount"] == 9.99
assert fields["success"] is True
assert fields["msg"] == "ok"
# --- Multiline files ---
def test_multiline_file(parser, create_log_file):
path = create_log_file(
"2024-01-15T10:23:45.123Z [INFO] service=auth user_id=42 action=login duration_ms=150 status=success",
'[ERROR] 2024-01-15T10:23:45.456Z service=payment action=charge amount=99.99 error="timeout"',
"2024-01-15 10:23:46 WARN service=auth retry_count=3",
'2024-01-15T10:23:47.001Z [DEBUG] service=gateway msg="Health check passed"',
"",
"-- separator --",
"2024/01/15 10:24:01 [INFO] service=gateway action=startup",
)
entries = parser.load(path)
assert len(entries) == 5
assert entries[0].level == "INFO"
assert entries[1].level == "ERROR"
assert entries[2].level == "WARN"
assert entries[3].level == "DEBUG"
assert entries[4].level == "INFO"
def test_timestamp_after_level(parser, create_log_file):
path = create_log_file(
"[ERROR] 2024-01-15T10:23:45.456Z service=payment"
)
entries = parser.load(path)
assert len(entries) == 1
assert entries[0].level == "ERROR"
assert entries[0].timestamp == datetime(2024, 1, 15, 10, 23, 45, 456000)
def test_raw_line_preserved(parser, create_log_file):
line = "2024-01-15T10:23:45Z [INFO] service=auth"
path = create_log_file(line)
entries = parser.load(path)
assert entries[0].raw == line
# --- Edge cases ---
def test_load_nonexistent_file(parser):
with pytest.raises(FileNotFoundError):
parser.load("/nonexistent/path/to/file.log")
def test_level_only_no_timestamp(parser, create_log_file):
path = create_log_file("[INFO] service=auth action=login")
entries = parser.load(path)
assert len(entries) == 1
assert entries[0].level == "INFO"
assert entries[0].timestamp is None
assert entries[0].fields["service"] == "auth"
def test_level_only_no_fields(parser, create_log_file):
path = create_log_file("2024-01-15T10:23:45Z [INFO]")
entries = parser.load(path)
assert len(entries) == 1
assert entries[0].level == "INFO"
assert entries[0].fields == {}
def test_nested_field_with_comma_in_quoted_value(parser, create_log_file):
path = create_log_file(
'2024-01-15T10:23:45Z [INFO] ctx={msg="hello, world",count=1}'
)
entries = parser.load(path)
ctx = entries[0].fields["ctx"]
assert ctx["msg"] == "hello, world"
assert ctx["count"] == 1
def test_fields_with_spaces_in_key_skipped(parser, create_log_file):
path = create_log_file(
"2024-01-15T10:23:45Z [INFO] some garbage key=value"
)
entries = parser.load(path)
assert entries[0].fields["key"] == "value"
assert "some garbage" not in entries[0].fields
As you can see, our test suite is much richer now. There are way more tests, and we can see that many more cases are covered now. That's much better than before. Now, we can refactor with much higher confidence. Such a test suite can actually catch potential drift in Rust implementation. The initial single test would pass, despite the fact that the Rust implementation lacks support for nested fields, for example. With higher confidence, we can move much faster because we can rely on tests rather than reviewing Python and Rust code side-by-side.
Conclusion
While we haven't touched Rust yet, we've taken a very important step towards implementing it. We ensured our Python code was well tested. That's the first step if we want to do the rewrite confidently. We'll continue our work in part 2, coming out next week.
All the techniques and approaches that we're using in this series are explained in detail inside Complete Python Testing Guide. That includes real-world examples such as CRM and email integration, together with ready-to-use AI agent instruction files that will help you generate high-value tests with AI agents.