In some situations, there is a need to redact or remove sensitive, personal identifiable information (PII) from your traces. In such a case, using a custom span processor that redacts PII from your spans during OTEL export can be very useful.
The goal of this documentation is to provide an example custom span processor that redacts PII information on any span using Regex patterns. Note that, even though this example uses regex patterns, the span processor can be customized with more advanced PII detection frameworks like Microsoft Presidio.
If you want to completely hide the inputs and outputs of your traces, check out the Mask Span Attributes docs.
Let's create an example custom span processor to detect and redact PII data. In OpenTelemetry, every span processor needs to have the following methods:
We'll use the on_end(span)to redact PII data from different data types by defining regex patterns. The given example below supports the following types of PII:
Email addresses
Phone numbers (US format)
Social Security Numbers (SSN)
Credit card numbers
IP addresses
Dates of birth
Note that, you can always pass in additional patterns of your choice to the custom spans processor below!
import re
from opentelemetry.sdk.trace import SpanProcessor
from opentelemetry.sdk.trace.export import SpanExporter
import json
from typing import Any, Dict, Optional
from opentelemetry.trace import Span
from opentelemetry.sdk.trace import ReadableSpan
class PIIRedactingSpanProcessor(SpanProcessor):
def __init__(self, exporter: SpanExporter, pii_patterns: Optional[Dict[str, str]] = None):
"""
Initialize the PII redacting processor with an exporter and optional patterns.
Args:
exporter: The span exporter to use after PII redaction
pii_patterns: Dictionary of pattern names and their regex patterns
"""
self._exporter = exporter
self._default_patterns = {
'email': r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}',
'phone': r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b',
'ssn': r'\b\d{3}-\d{2}-\d{4}\b',
'credit_card': r'\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b',
'ip_address': r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b',
'date_of_birth': r'\b\d{2}[-/]\d{2}[-/]\d{4}\b',
}
self._patterns = {**self._default_patterns, **(pii_patterns or {})}
# Compile patterns for better performance
self._compiled_patterns = {
name: re.compile(pattern) for name, pattern in self._patterns.items()
}
def _redact_string(self, value: str) -> str:
"""Redact PII from any string value."""
redacted = value
for pattern_name, pattern in self._compiled_patterns.items():
redacted = pattern.sub(f'[REDACTED_{pattern_name.upper()}]', redacted)
return redacted
def _redact_value(self, value: Any) -> Any:
"""
Redact PII from any value type.
Handles strings, numbers, booleans, lists, and dictionaries.
"""
if isinstance(value, str):
try:
# Try to parse as JSON first
json_obj = json.loads(value)
return json.dumps(self._redact_value(json_obj))
except json.JSONDecodeError:
# If not valid JSON, treat as regular string
return self._redact_string(value)
elif isinstance(value, dict):
return {k: self._redact_value(v) for k, v in value.items()}
elif isinstance(value, list):
return [self._redact_value(item) for item in value]
elif isinstance(value, (int, float, bool, type(None))):
return value
else:
# Convert any other types to string and redact
return self._redact_string(str(value))
def _redact_span_attributes(self, span: ReadableSpan) -> Dict[str, Any]:
"""
Create a new dictionary of redacted span attributes.
"""
redacted_attributes = {}
for key, value in span.attributes.items():
# Skip certain metadata attributes that shouldn't contain PII
if key in {'service.name', 'telemetry.sdk.name', 'telemetry.sdk.version'}:
redacted_attributes[key] = value
continue
try:
redacted_value = self._redact_value(value)
redacted_attributes[key] = redacted_value
except Exception as e:
redacted_attributes[key] = "[REDACTION_ERROR]"
print(f"Error redacting attribute {key}: {str(e)}")
return redacted_attributes
def _create_redacted_span(self, span: ReadableSpan) -> ReadableSpan:
"""
Create a new span with redacted attributes instead of modifying the original.
"""
# Create redacted attributes
redacted_attributes = self._redact_span_attributes(span)
# Create a new span with redacted name and attributes
redacted_name = self._redact_string(span.name)
# Handle events
redacted_events = []
for event in span.events:
redacted_event_attrs = {
k: self._redact_value(v) for k, v in event.attributes.items()
}
# Create new event with redacted attributes
from opentelemetry.sdk.trace import Event
redacted_event = Event(
name=self._redact_string(event.name),
attributes=redacted_event_attrs,
timestamp=event.timestamp
)
redacted_events.append(redacted_event)
# Create new span with redacted data
from opentelemetry.sdk.trace import Span
redacted_span = ReadableSpan(
name=redacted_name,
context=span.get_span_context(),
parent=span.parent,
resource=span.resource,
attributes=redacted_attributes,
events=redacted_events,
links=span.links,
kind=span.kind,
status=span.status,
start_time=span.start_time,
end_time=span.end_time,
instrumentation_info=span.instrumentation_info
)
return redacted_span
def on_start(self, span: Span, parent_context: Optional[Any] = None):
"""Called when a span starts."""
pass
def on_end(self, span: ReadableSpan):
"""Called when a span ends. Creates a redacted copy and exports it."""
redacted_span = self._create_redacted_span(span)
self._exporter.export([redacted_span])
def shutdown(self):
"""Shuts down the processor and exporter."""
self._exporter.shutdown()
def force_flush(self, timeout_millis: int = 30000):
"""Forces flush of pending spans."""
self._exporter.force_flush(timeout_millis)
Once we create a custom span processor, we can simply initialize it and pass it to our tracer provider during our OTEL configurations:
# Set up the tracer provider with the PII processor
tracer_provider = trace_sdk.TracerProvider(
resource=Resource(attributes=trace_attributes)
)
# Create the PII redacting processor with the OTLP exporter
pii_processor = PIIRedactingSpanProcessor(OTLPSpanExporter(endpoint),)
tracer_provider.add_span_processor(pii_processor)
tracer_provider.add_span_processor(BatchSpanProcessor(OTLPSpanExporter(endpoint)))
trace_api.set_tracer_provider(tracer_provider=tracer_provider)
# To get your tracer
tracer = trace_api.get_tracer(__name__)
# Finish automatic instrumentation
OpenAIInstrumentor().instrument()
Once you have the processor set up, any PII data with the defined regex patterns should be automatically redacted.