Introduction: From ConnectionError to Production-Ready in 15 Minutes
I encountered a critical ConnectionError: timeout issue at 3 AM last week when our production AI feature stopped responding. The culprit? Hardcoded API endpoints and missing retry logic. After fixing this, I realized most developers face similar challenges when integrating AI APIs into Spring Boot applications. This guide provides a battle-tested, production-ready implementation that eliminates these common pitfalls.
Sign up here for HolySheep AI, which offers rates at ¥1=$1—saving you 85%+ compared to typical ¥7.3 rates—and delivers sub-50ms latency with free credits on registration.
Project Setup and Dependencies
Create a new Spring Boot project or add these dependencies to your existing pom.xml:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.2.0</version>
<relativePath/>
</parent>
<groupId>com.example</groupId>
<artifactId>spring-boot-ai-integration</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<properties>
<java.version>17</java.version>
<spring-cloud.version>2023.0.0</spring-cloud.version>
</properties>
<dependencies>
<!-- Spring Boot Web -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- Spring Boot Validation -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-validation</artifactId>
</dependency>
<!-- HTTP Client (Spring WebClient) -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-webflux</artifactId>
</dependency>
<!-- Resilience4j for retry and circuit breaker -->
<dependency>
<groupId>io.github.resilience4j</groupId>
<artifactId>resilience4j-spring-boot3</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-aop</artifactId>
</dependency>
<!-- Jackson for JSON processing -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
<!-- Lombok -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<!-- Configuration Processor -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<!-- Test Dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>
</project>
Configuration Properties
Create your application.yml with HolySheep AI configuration:
spring:
application:
name: spring-boot-ai-integration
jackson:
serialization:
write-dates-as-timestamps: false
default-property-inclusion: non_null
server:
port: 8080
HolySheep AI Configuration
ai:
holysheep:
base-url: https://api.holysheep.ai/v1
api-key: YOUR_HOLYSHEEP_API_KEY
timeout-seconds: 30
max-retries: 3
retry-delay-ms: 1000
connection-pool-size: 100
max-codec-resource-pool-size: 200
Resilience4j Configuration
resilience4j:
retry:
instances:
aiApiRetry:
max-attempts: 3
wait-duration: 1s
retry-exceptions:
- java.io.IOException
- java.util.concurrent.TimeoutException
- org.springframework.web.reactive.function.client.WebClientResponseException$ServiceUnavailable
ignore-exceptions:
- org.springframework.web.reactive.function.client.WebClientResponseException$BadRequest
- org.springframework.web.reactive.function.client.WebClientResponseException$Unauthorized
circuitbreaker:
instances:
aiApiCircuitBreaker:
sliding-window-size: 10
failure-rate-threshold: 50
wait-duration-in-open-state: 30s
permitted-number-of-calls-in-half-open-state: 5
sliding-window-type: COUNT_BASED
logging:
level:
com.example.ai: DEBUG
org.springframework.web.reactive: DEBUG
Core Implementation: AI Service Layer
Here's the complete production-ready AI service with WebClient, retry logic, and circuit breaker:
package com.example.ai.service;
import com.example.ai.config.HolySheepProperties;
import com.example.ai.dto.*;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.client.WebClient;
import org.springframework.web.reactive.function.client.WebClientResponseException;
import reactor.core.publisher.Mono;
import reactor.util.retry.Retry;
import jakarta.annotation.PostConstruct;
import java.time.Duration;
import java.util.List;
import java.util.Map;
@Slf4j
@Service
@RequiredArgsConstructor
public class HolySheepAIService {
private final HolySheepProperties properties;
private final ObjectMapper objectMapper;
private WebClient webClient;
@PostConstruct
public void init() {
this.webClient = WebClient.builder()
.baseUrl(properties.getBaseUrl())
.defaultHeader("Authorization", "Bearer " + properties.getApiKey())
.defaultHeader("Content-Type", "application/json")
.codecs(configurer -> configurer
.defaultCodecs()
.maxInMemorySize(16 * 1024 * 1024)) // 16MB
.build();
}
/**
* Send a chat completion request to HolySheep AI
* Supports multiple models: gpt-4.1, claude-sonnet-4.5, gemini-2.5-flash, deepseek-v3.2
*
* 2026 Pricing (per 1M tokens):
* - GPT-4.1: $8.00
* - Claude Sonnet 4.5: $15.00
* - Gemini 2.5 Flash: $2.50
* - DeepSeek V3.2: $0.42
*/
public Mono createChatCompletion(ChatCompletionRequest request) {
log.info("Sending chat completion request with model: {}", request.getModel());
return webClient.post()
.uri("/chat/completions")
.bodyValue(request)
.retrieve()
.bodyToMono(ChatCompletionResponse.class)
.timeout(Duration.ofSeconds(properties.getTimeoutSeconds()))
.doOnSuccess(response -> log.info("Chat completion successful. Usage: {}", response.getUsage()))
.doOnError(error -> log.error("Chat completion failed: {}", error.getMessage()))
.retryWhen(Retry.backoff(properties.getMaxRetries(),
Duration.ofMillis(properties.getRetryDelayMs()))
.filter(this::isRetryable)
.onRetryExceeded(error -> log.error("Retry limit exceeded")))
.onErrorResume(this::handleError);
}
/**
* Stream chat completions for real-time responses
*/
public Mono createStreamingChatCompletion(ChatCompletionRequest request) {
log.info("Starting streaming chat completion with model: {}", request.getModel());
return webClient.post()
.uri("/chat/completions")
.bodyValue(request)
.retrieve()
.bodyToMono(String.class)
.timeout(Duration.ofSeconds(properties.getTimeoutSeconds() * 2))
.doOnNext(chunk -> log.debug("Received streaming chunk: {}", chunk));
}
/**
* Check account balance
*/
public Mono getBalance() {
log.info("Fetching account balance from HolySheep AI");
return webClient.get()
.uri("/balance")
.retrieve()
.bodyToMono(BalanceResponse.class)
.doOnSuccess(response -> log.info("Balance retrieved: {}", response));
}
private boolean isRetryable(Throwable throwable) {
if (throwable instanceof WebClientResponseException e) {
return e.getStatusCode().is5xxServerError() ||
e.getStatusCode().value() == 429; // Rate limit
}
return throwable instanceof java.io.IOException ||
throwable instanceof java.util.concurrent.TimeoutException;
}
private Mono handleError(Throwable error) {
if (error instanceof WebClientResponseException e) {
log.error("API Error - Status: {}, Body: {}",
e.getStatusCode(), e.getResponseBodyAsString());
return Mono.error(new AIApiException(
"AI API Error: " + e.getStatusCode() + " - " + e.getMessage(),
e.getStatusCode().value(),
e.getResponseBodyAsString()
));
}
log.error("Unexpected error during AI API call: {}", error.getMessage());
return Mono.error(new AIApiException("AI API call failed: " + error.getMessage(), 0, null));
}
}
/**
* Custom exception for AI API errors
*/
class AIApiException extends RuntimeException {
private final int statusCode;
private final String responseBody;
public AIApiException(String message, int statusCode, String responseBody) {
super(message);
this.statusCode = statusCode;
this.responseBody = responseBody;
}
public int getStatusCode() { return statusCode; }
public String getResponseBody() { return responseBody; }
}
DTO Classes and Request/Response Models
package com.example.ai.dto;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.List;
import java.util.Map;
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class ChatCompletionRequest {
private String model; // "gpt-4.1", "claude-sonnet-4.5", "gemini-2.5-flash", "deepseek-v3.2"
private List messages;
private Double temperature;
private Integer maxTokens;
private Double topP;
private Integer n;
private Boolean stream;
private String stop;
@JsonProperty("presence_penalty")
private Double presencePenalty;
@JsonProperty("frequency_penalty")
private Double frequencyPenalty;
@JsonProperty("user")
private String user;
private Map<String, Object> functions;
private String functionCall;
}
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class Message {
private String role; // "system", "user", "assistant", "function"
private String content;
private String name;
private String functionCall;
private FunctionCall functionCallDetails;
}
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
class FunctionCall {
private String name;
private String arguments; // JSON string
}
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
@JsonInclude(JsonInclude.Include.NON_NULL)
public class ChatCompletionResponse {
private String id;
private String object;
private long created;
private String model;
private List<Choice> choices;
private Usage usage;
private String serviceTier;
private String systemFingerprint;
}
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
class Choice {
private int index;
private Message message;
private Object logprobs;
private String finishReason;
}
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
class Usage {
@JsonProperty("prompt_tokens")
private int promptTokens;
@JsonProperty("completion_tokens")
private int completionTokens;
@JsonProperty("total_tokens")
private int totalTokens;
}
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
class BalanceResponse {
private String hsaBalance;
private String totalBalance;
private String currency;
}
REST Controller with Production Features
package com.example.ai.controller;
import com.example.ai.dto.*;
import com.example.ai.service.HolySheepAIService;
import io.github.resilience4j.circuitbreaker.annotation.CircuitBreaker;
import io.github.resilience4j.retry.annotation.Retry;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;
import jakarta.validation.Valid;
import java.util.List;
import java.util.Map;
@Slf4j
@RestController
@RequestMapping("/api/v1/ai")
@RequiredArgsConstructor
public class AIController {
private final HolySheepAIService aiService;
/**
* Chat completion endpoint with circuit breaker and retry
*/
@PostMapping("/chat")
@CircuitBreaker(name = "aiApiCircuitBreaker", fallbackMethod = "chatFallback")
@Retry(name = "aiApiRetry")
public Mono<ResponseEntity<ChatCompletionResponse>> createChat(
@Valid @RequestBody ChatRequest request) {
log.info("Received chat request with model: {}", request.getModel());
ChatCompletionRequest completionRequest = ChatCompletionRequest.builder()
.model(request.getModel())
.messages(request.getMessages())
.temperature(request.getTemperature() != null ? request.getTemperature() : 0.7)
.maxTokens(request.getMaxTokens() != null ? request.getMaxTokens() : 2048)
.stream(false)
.build();
return aiService.createChatCompletion(completionRequest)
.map(ResponseEntity::ok)
.onErrorResume(error -> Mono.just(
ResponseEntity.internalServerError().body(null)));
}
/**
* Streaming chat completion endpoint
*/
@PostMapping(value = "/chat/stream", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Mono<ResponseEntity<String>> createStreamingChat(
@Valid @RequestBody ChatRequest request) {
log.info("Received streaming chat request");
ChatCompletionRequest completionRequest = ChatCompletionRequest.builder()
.model(request.getModel())
.messages(request.getMessages())
.temperature(request.getTemperature() != null ? request.getTemperature() : 0.7)
.maxTokens(request.getMaxTokens() != null ? request.getMaxTokens() : 2048)
.stream(true)
.build();
return aiService.createStreamingChatCompletion(completionRequest)
.map(ResponseEntity::ok)
.onErrorResume(error -> Mono.just(
ResponseEntity.internalServerError()
.body("data: {\"error\": \"" + error.getMessage() + "\"}\n\n")));
}
/**
* Get account balance
*/
@GetMapping("/balance")
public Mono<ResponseEntity<BalanceResponse>> getBalance() {
log.info("Balance check requested");
return aiService.getBalance()
.map(ResponseEntity::ok)
.onErrorResume(error -> Mono.just(
ResponseEntity.internalServerError().body(null)));
}
/**
* Health check endpoint
*/
@GetMapping("/health")
public Mono<ResponseEntity<Map<String, Object>>> healthCheck() {
return aiService.getBalance()
.map(balance -> ResponseEntity.ok(Map.of(
"status", "UP",
"provider", "HolySheep AI",
"balance", balance.getTotalBalance(),
"latency_ms", System.currentTimeMillis()
)))
.onErrorResume(error -> Mono.just(
ResponseEntity.ok(Map.of(
"status", "DOWN",
"error", error.getMessage()
))));
}
/**
* Fallback method for circuit breaker
*/
public Mono<ResponseEntity<ChatCompletionResponse>> chatFallback(
ChatRequest request, Throwable error) {
log.error("Circuit breaker triggered for chat request. Error: {}", error.getMessage());
return Mono.fromSupplier(() -> ResponseEntity.ok(
ChatCompletionResponse.builder()
.choices(List.of(
ChatCompletionResponse.Choice.builder()
.index(0)
.message(Message.builder()
.role("assistant")
.content("I apologize, but I'm experiencing high demand right now. " +
"Please try again in a few moments. " +
"HolySheep AI offers 85%+ savings compared to standard APIs " +
"with sub-50ms latency.")
.build())
.finishReason("error")
.build()
))
.build()
));
}
}
@Data
class ChatRequest {
private String model = "deepseek-v3.2"; // Default to most cost-effective model
@Valid
private List<Message> messages;
private Double temperature;
private Integer maxTokens;
}
Configuration Properties Class
package com.example.ai.config;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Data
@Component
@ConfigurationProperties(prefix = "ai.holysheep")
public class HolySheepProperties {
private String baseUrl = "https://api.holysheep.ai/v1";
private String apiKey;
private int timeoutSeconds = 30;
private int maxRetries = 3;
private long retryDelayMs = 1000;
private int connectionPoolSize = 100;
private int maxCodecResourcePoolSize = 200;
}
Testing the Integration
package com.example.ai.service;
import com.example.ai.dto.*;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.ActiveProfiles;
import reactor.core.publisher.Mono;
import reactor.test.StepVerifier;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
@SpringBootTest
@ActiveProfiles("test")
class HolySheepAIServiceIntegrationTest {
@Autowired
private HolySheepAIService aiService;
@Autowired
private HolySheepProperties properties;
@Test
void testSimpleChatCompletion()
Related Resources
Related Articles