pythonintermediate

LLM Testing with DeepEval

Write unit tests for LLM outputs using the DeepEval framework for correctness and hallucination detection.

python
import pytest
from deepeval import assert_test
from deepeval.metrics import AnswerRelevancyMetric, HallucinationMetric
from deepeval.test_case import LLMTestCase, LLMTestCaseParams

def test_answer_relevancy():
    test_case = LLMTestCase(
        input='What is the capital of France?',
        actual_output='Paris is the capital city of France.',
        expected_output='Paris',
    )
    metric = AnswerRelevancyMetric(threshold=0.7, model='gpt-4o-mini')
    assert_test(test_case, [metric])

def test_no_hallucination():
    test_case = LLMTestCase(
        input='Who created Python?',
        actual_output='Python was created by Guido van Rossum in the late 1980s.',
        context=['Guido van Rossum created Python, releasing version 1.0 in 1994.'],
    )
    metric = HallucinationMetric(threshold=0.3, model='gpt-4o-mini')
    assert_test(test_case, [metric])

if __name__ == '__main__':
    pytest.main([__file__, '-v'])

Use Cases

  • LLM testing
  • hallucination detection
  • QA automation

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.