Resources

Episode Code - https://github.com/driftingruby/427-detect-spam-with-ai

This episode is sponsored by Honeybadger


Download Source Code

Summary

Python Service

# Terminal
python web.py

# requirements.txt
torch
transformers
flask
gunicorn

# web.py
from flask import Flask, request
import detector as detector
app = Flask(__name__)

@app.route('/check', methods=["POST"])
def handle_request():
  prompt = request.form.get("prompt")
  if not prompt:
    return { "error": "prompt parameter is required" }, 400
  detection = detector.detect(prompt)
  return { "spam": bool(detection)}

app.run(host="0.0.0.0", port=8000)

# detector.py
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification

if torch.cuda.is_available():
  print("USING CUDA")
  device = torch.device("cuda")
elif torch.backends.mps.is_available():
  print("USING Apple Metal")
  device = torch.device("mps")
else:
  print("USING CPU")
  device = torch.device("cpu")

model_path = "mshenoda/roberta-spam"
tokenizer = RobertaTokenizer.from_pretrained(model_path)
model = RobertaForSequenceClassification.from_pretrained(model_path, num_labels=2).to(device)

def detect(text):
  inputs = tokenizer(text, return_tensors="pt", padding="max_length", truncation=True, max_length=512)
  inputs = {k: v.to(device) for k,v in inputs.items()}

  with torch.no_grad():
    outputs = model(**inputs)

  return torch.argmax(outputs.logits, dim=1)

# Dockerfile
FROM python:3.11.6

RUN apt update && apt upgrade -y

WORKDIR /app
COPY . /app
RUN pip install --no-cache-dir -r requirements.txt

ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility

EXPOSE 5000

CMD ["gunicorn -w 2 -b 0.0.0.0:5000 web:app"]

# docker-compose.yml
version: '3.8'

services:
  app:
    # image: USERNAME/IMAGE:latest
    command: gunicorn -w 2 -b 0.0.0.0:5000 web:app
    ports:
      - 5000:5000
    environment:
      - NVIDIA_VISIBLE_DEVICES=all
      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
    deploy:
      resources:
        reservations:
          devices:
            - capabilities: [gpu]
    volumes:
      - /root/model-cache:/root/.cache

Rails Application


# Terminal
rails g migration add_spam_fields_to_comments spam:boolean spam_checked_on:datetime
rails g job CheckForSpam

# db/migrate/20231105020507_add_spam_fields_to_comments.rb
class AddSpamFieldsToComments < ActiveRecord::Migration[7.1]
  def change
    add_column :comments, :spam, :boolean, default: false
    add_column :comments, :spam_checked_on, :datetime
  end
end

# app/models/comment.rb
class Comment < ApplicationRecord
  has_rich_text :content
  belongs_to :post
  broadcasts_to :post

  # after_create_commit -> { broadcast_append_to post }
  after_create_commit -> { CheckForSpamJob.perform_later self }
  # after_update_commit -> { broadcast_replace_to post }
  # after_destroy_commit -> { broadcast_remove_to post }
end

# app/jobs/check_for_spam_job.rb
require "net/http"
require "uri"
require "json"

class CheckForSpamJob < ApplicationJob
  queue_as :default

  def perform(comment)
    prompt = comment.content.to_plain_text
    uri = URI.parse("http://127.0.0.1:8000/check")
    http = Net::HTTP.new(uri.host, uri.port)
    http.use_ssl = true if uri.scheme == "https"
    http.open_timeout = 10
    http.read_timeout = 10
    request = Net::HTTP::Post.new(uri.request_uri)
    request.set_form_data({ prompt: prompt })
    response = http.request(request)
    result = JSON.parse(response.body)

    if defined?(result["spam"])
      # comment.update(spam_checked_on: Time.now, spam: result["spam"])
      comment.destroy if result["spam"]
    else
      # retry
    end

  rescue StandardError => e
    # retry
  end
end

# app/views/comments/_comment.html.erb
<%= turbo_frame_tag dom_id(comment), class: "card my-3 shadow-sm" do %>
  <%= content_tag :div, class: "card-header #{comment.spam? ? "bg-danger" : "bg-primary"} text-white" do %>
    <div class="">
      Commented on <%= comment.updated_at.strftime("%B %d, %Y at %l:%M %p") %>
    </div>
  <% end %>
  <%= content_tag :div, class: "card-body #{comment.spam? ? "bg-danger-subtle" : "bg-primary-subtle"}" do %>
    <p class="card-text"><%= comment.content %></p>
  <% end %>
  <div class="card-footer bg-transparent">
    <%= link_to "Edit", [:edit, comment.post, comment], class: 'btn btn-outline-secondary btn-sm me-2' %>
    <%= link_to "Delete", [comment.post, comment], "data-turbo-method": :delete, "data-turbo-confirm": "Are you sure?", class: 'btn btn-outline-danger btn-sm' %>
  </div>
<% end %>

# app/controllers/comments_controller.rb
def index
  # @comments = @post.comments.order(updated_at: :asc)
  @comments = @post.comments.where(spam: false).order(updated_at: :asc)
end