The Atlas BigLaw / Big Michael — documentation bound to its code
7 documents

What a matter costs

See both ledgers a matter accrues: a CostEntry for every single model call (tokens, USD, cache buckets, local power) and a billable TimeEntry in 6-minute units for task runs, gate reviews, and AI agent work.

src/cost/index.ts293 lines · calcCostUsd L159–174
Outline 18 symbols
1// SPDX-License-Identifier: AGPL-3.0-only
2// Copyright (C) 2026 Discover Legal
3
4/**
5 * Model cost and power tracking.
6 *
7 * Every provider call records a CostEntry — token counts, USD cost, and (for
8 * local models) an estimated power draw. Entries persist to costs.jsonl and
9 * are queryable by task, profile, or aggregate summary.
10 *
11 * Pricing is taken from the PRICING table below. Override individual models
12 * via environment variables:
13 * COST_<NORMALISED_MODEL_ID>_IN=3.00 (USD per million input tokens)
14 * COST_<NORMALISED_MODEL_ID>_OUT=15.00 (USD per million output tokens)
15 * where NORMALISED_MODEL_ID is the model string uppercased with hyphens/dots
16 * replaced by underscores, e.g. COST_CLAUDE_SONNET_4_6_IN=3.00
17 *
18 * Local inference power consumption is estimated from wall-clock duration and
19 * the configured LOCAL_INFERENCE_WATTS value (default 250 W for a typical GPU).
20 * Set LOCAL_INFERENCE_WATTS=30 for Apple Silicon / low-power devices.
21 */
22
23import { appendFile, readFile, mkdir } from "node:fs/promises";
24import { dirname } from "node:path";
25import { randomUUID } from "node:crypto";
26import { logger } from "../logger.js";
27import { calcEmissions } from "./emissions.js";
28import { Config } from "../config.js";
29
30// ─── Types ────────────────────────────────────────────────────────────────────
31
32export type CostContext =
33 | "task" // agent processing within a DyTopo round
34 | "descriptor" // Need/Offer generation (Haiku, many parallel)
35 | "synthesis" // root orchestrator final synthesis
36 | "tabulate" // structured table extraction
37 | "round_goal" // round goal generation
38 | "protocol_debate" // adversarial debate (Opus)
39 | "protocol_verify" // verification pipeline (Haiku ×N)
40 | "tone_analysis" // LinkedIn tone analysis chain
41 | "classification" // practice area / client / NOSLEGAL detection
42 | "ocg_extraction" // OCG rule extraction from document
43 | "ocg_check" // OCG billing compliance check on time entries
44 | "voice_analysis" // client voice/communication guide analysis
45 | "entry_summarize" // worker-generated OCG-compliant time entry description
46 | "citation_check" // CitationEngine — KeyCite/Shepard's replacement
47 | "playbook_build" // PlaybookBuilder — Contract Express/Practical Law replacement
48 | "invoice_validation" // InvoiceValidator — reverse-OCG in-house billing review
49 | "redline" // RedlineEngine — automated playbook-driven contract markup
50 | "headnote_extract" // HeadnoteEngine — Sonnet extraction pass
51 | "headnote_meta" // HeadnoteEngine — Haiku synthesis/meta pass
52 | "client_briefing" // BriefingEngine — Sonnet pre-call partner briefing
53 | "precedent_structure" // PrecedentGenerator — Haiku clause structure
54 | "precedent_draft"; // PrecedentGenerator — Opus document drafting
55
56export interface CostEntry {
57 id: string;
58 ts: string;
59 model: string;
60 provider: "anthropic" | "ollama" | "local";
61 inputTokens: number;
62 outputTokens: number;
63 /** Prompt-cache write tokens (Anthropic only). Priced at 1.25× input rate. */
64 cacheWriteTokens?: number;
65 /** Prompt-cache read tokens (Anthropic only). Priced at 0.10× input rate. */
66 cacheReadTokens?: number;
67 /** USD cost, or null for local models with no API charge. */
68 costUsd: number | null;
69 /** Estimated power draw in watt-hours (local models only). */
70 estimatedWh: number | null;
71 /** Configured watts used for the estimate. */
72 estimatedWatts: number | null;
73 /** CO₂ emissions in grams for this call (local inference only, from CO2.js grid data). */
74 co2Grams: number | null;
75 /** Estimated electricity cost in USD (local inference only, IEA 2024 tariff data). */
76 electricityCostUsd: number | null;
77 durationMs: number;
78 context: CostContext;
79 taskId?: string;
80 profileId?: string;
81 agentId?: string;
82}
83
84export interface CostSummary {
85 totalUsd: number;
86 totalInputTokens: number;
87 totalOutputTokens: number;
88 totalCacheWriteTokens: number;
89 totalCacheReadTokens: number;
90 totalWh: number;
91 totalCo2Grams: number;
92 totalElectricityCostUsd: number;
93 byModel: Record<string, {
94 usd: number;
95 inputTokens: number;
96 outputTokens: number;
97 cacheWriteTokens: number;
98 cacheReadTokens: number;
99 wh: number;
100 co2Grams: number;
101 electricityCostUsd: number;
102 calls: number;
103 }>;
104 byContext: Record<string, { usd: number; inputTokens: number; outputTokens: number; calls: number }>;
105 entryCount: number;
106}
107
108// ─── Pricing ──────────────────────────────────────────────────────────────────
109
110interface ModelPrice { in: number; out: number }
111
112// USD per million tokens (input / output).
113// These reflect Anthropic list pricing as of mid-2026; adjust via env if needed.
114const BASE_PRICING: Record<string, ModelPrice> = {
115 "claude-haiku-4-5-20251001": { in: 1.00, out: 5.00 },
116 "claude-haiku-4-5": { in: 1.00, out: 5.00 },
117 "claude-sonnet-4-6": { in: 3.00, out: 15.00 },
118 "claude-opus-4-8": { in: 15.00, out: 75.00 },
119 "claude-opus-4-5": { in: 15.00, out: 75.00 },
120 "claude-3-5-haiku-20241022": { in: 1.00, out: 5.00 },
121 "claude-3-5-sonnet-20241022": { in: 3.00, out: 15.00 },
122 "claude-3-haiku-20240307": { in: 0.25, out: 1.25 },
123 "claude-3-opus-20240229": { in: 15.00, out: 75.00 },
124};
125
126function normaliseModelKey(model: string): string {
127 return model.toUpperCase().replace(/[-./]/g, "_");
128}
129
130function loadPricing(): Record<string, ModelPrice> {
131 const pricing = { ...BASE_PRICING };
132 for (const [raw, p] of Object.entries(BASE_PRICING)) {
133 const key = normaliseModelKey(raw);
134 const envIn = process.env[`COST_${key}_IN`];
135 const envOut = process.env[`COST_${key}_OUT`];
136 if (envIn || envOut) {
137 pricing[raw] = {
138 in: envIn ? parseFloat(envIn) : p.in,
139 out: envOut ? parseFloat(envOut) : p.out,
140 };
141 }
142 }
143 return pricing;
144}
145
146const PRICING = loadPricing();
147
148/**
149 * Calculate USD cost for a model call.
150 *
151 * Anthropic uses three token buckets with different rates:
152 * - inputTokens: 100% of input rate (non-cached)
153 * - cacheWriteTokens: 125% of input rate (written to prompt cache)
154 * - cacheReadTokens: 10% of input rate (served from prompt cache)
155 * - outputTokens: output rate
156 *
157 * Returns null if the model is not in the pricing table (e.g. unknown local model).
158 */
159export function calcCostUsd(
160 model: string,
161 inputTokens: number,
162 outputTokens: number,
163 cacheWriteTokens = 0,
164 cacheReadTokens = 0,
165): number | null {
166 const p = PRICING[model];
167 if (!p) return null;
168 return (
169 inputTokens * p.in +
170 outputTokens * p.out +
171 cacheWriteTokens * p.in * 1.25 +
172 cacheReadTokens * p.in * 0.10
173 ) / 1_000_000;
174}
175
176export function calcWattHours(watts: number, durationMs: number): number {
177 return (watts * durationMs) / 3_600_000;
178}
179
180// ─── Store ────────────────────────────────────────────────────────────────────
181
182const COST_FILE = process.env.COST_LOG_FILE ?? "./data/costs.jsonl";
183
184export class CostStore {
185 private entries: CostEntry[] = [];
186 // Serialise writes through a promise chain to prevent interleaved appends.
187 private writeChain: Promise<void> = Promise.resolve();
188
189 async init(): Promise<void> {
190 try {
191 await mkdir(dirname(COST_FILE), { recursive: true });
192 const raw = await readFile(COST_FILE, "utf8");
193 // Parse per line and skip malformed ones — a single truncated tail line
194 // (e.g. from a crash mid-append) must not discard the entire cost history.
195 const lines = raw.trim().split("\n").filter(Boolean);
196 let skipped = 0;
197 this.entries = [];
198 for (const line of lines) {
199 try { this.entries.push(JSON.parse(line) as CostEntry); }
200 catch { skipped++; }
201 }
202 logger.info("Cost log loaded", { entries: this.entries.length, skipped, file: COST_FILE });
203 } catch {
204 this.entries = [];
205 }
206 }
207
208 record(entry: Omit<CostEntry, "id" | "ts" | "co2Grams" | "electricityCostUsd">): void {
209 const emissions = entry.estimatedWh != null
210 ? calcEmissions(entry.estimatedWh, Config.local.inferenceRegion)
211 : null;
212 const full: CostEntry = {
213 ...entry,
214 // Authoritative computed fields last so a caller-supplied object cannot
215 // override the id/timestamp/emissions (the Omit type is erased at runtime).
216 id: randomUUID(),
217 ts: new Date().toISOString(),
218 co2Grams: emissions?.co2Grams ?? null,
219 electricityCostUsd: emissions?.electricityCostUsd ?? null,
220 };
221 this.entries.push(full);
222 this.writeChain = this.writeChain
223 .then(() => appendFile(COST_FILE, JSON.stringify(full) + "\n", "utf8"))
224 .catch((err) => logger.warn("Cost log write failed", { error: (err as Error).message }));
225 }
226
227 forTask(taskId: string): CostEntry[] {
228 return this.entries.filter((e) => e.taskId === taskId);
229 }
230
231 forProfile(profileId: string): CostEntry[] {
232 return this.entries.filter((e) => e.profileId === profileId);
233 }
234
235 summarise(entries: CostEntry[] = this.entries): CostSummary {
236 const byModel: CostSummary["byModel"] = {};
237 const byContext: CostSummary["byContext"] = {};
238 let totalUsd = 0;
239 let totalInputTokens = 0;
240 let totalOutputTokens = 0;
241 let totalCacheWriteTokens = 0;
242 let totalCacheReadTokens = 0;
243 let totalWh = 0;
244 let totalCo2Grams = 0;
245 let totalElectricityCostUsd = 0;
246
247 for (const e of entries) {
248 const usd = e.costUsd ?? 0;
249 const wh = e.estimatedWh ?? 0;
250 const cw = e.cacheWriteTokens ?? 0;
251 const cr = e.cacheReadTokens ?? 0;
252 const co2 = e.co2Grams ?? 0;
253 const elec = e.electricityCostUsd ?? 0;
254 totalUsd += usd;
255 totalInputTokens += e.inputTokens;
256 totalOutputTokens += e.outputTokens;
257 totalCacheWriteTokens += cw;
258 totalCacheReadTokens += cr;
259 totalWh += wh;
260 totalCo2Grams += co2;
261 totalElectricityCostUsd += elec;
262
263 const m = byModel[e.model] ?? { usd: 0, inputTokens: 0, outputTokens: 0, cacheWriteTokens: 0, cacheReadTokens: 0, wh: 0, co2Grams: 0, electricityCostUsd: 0, calls: 0 };
264 m.usd += usd;
265 m.inputTokens += e.inputTokens;
266 m.outputTokens += e.outputTokens;
267 m.cacheWriteTokens += cw;
268 m.cacheReadTokens += cr;
269 m.wh += wh;
270 m.co2Grams += co2;
271 m.electricityCostUsd += elec;
272 m.calls += 1;
273 byModel[e.model] = m;
274
275 const c = byContext[e.context] ?? { usd: 0, inputTokens: 0, outputTokens: 0, calls: 0 };
276 c.usd += usd;
277 c.inputTokens += e.inputTokens;
278 c.outputTokens += e.outputTokens;
279 c.calls += 1;
280 byContext[e.context] = c;
281 }
282
283 return {
284 totalUsd, totalInputTokens, totalOutputTokens,
285 totalCacheWriteTokens, totalCacheReadTokens,
286 totalWh, totalCo2Grams, totalElectricityCostUsd,
287 byModel, byContext, entryCount: entries.length,
288 };
289 }
290}
291
292export const costStore = new CostStore();
293