pythonintermediate

Text Classification with Hugging Face

Fine-tune or use pre-trained Hugging Face models for text classification.

pythonPress ⌘/Ctrl + Shift + C to copy

from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch

# Quick: use a pre-trained pipeline
classifier = pipeline(
    "text-classification",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

texts = [
    "This product is amazing! Best purchase ever.",
    "Terrible experience. Would not recommend.",
    "It's okay, nothing special."
]

results = classifier(texts)
for text, result in zip(texts, results):
    print(f"{result['label']} ({result['score']:.3f}): {text[:50]}")

# Manual: load model + tokenizer for more control
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

inputs = tokenizer(
    "This is a great movie!",
    return_tensors="pt",
    truncation=True,
    max_length=512
)

with torch.no_grad():
    outputs = model(**inputs)
    probs = torch.softmax(outputs.logits, dim=-1)
    pred = torch.argmax(probs).item()

print(f"Prediction: {model.config.id2label[pred]}")
print(f"Confidence: {probs[0][pred]:.3f}")