# Metrics

Cedana API for metrics

## GET /v2/metrics/k8

> Get metrics (K8s)

```json
{"openapi":"3.1.0","info":{"title":"Cedana API","version":"0.0.0-staging"},"tags":[{"name":"Metrics","description":"Cedana API for metrics"}],"security":[{"Token":[]}],"components":{"securitySchemes":{"Token":{"type":"http","scheme":"bearer"}},"schemas":{"NodeResourceMetricList":{"type":"object","required":["name"],"properties":{"details":{"type":"array","items":{"$ref":"#/components/schemas/NodeMetricItem"}},"name":{"type":"string"}}},"NodeMetricItem":{"type":"object","required":["metric_name","collector","value","timestamp"],"properties":{"collector":{"type":"string"},"cpu_core":{"type":"string"},"cpu_mode":{"type":"string"},"filesystem":{"type":"string"},"metric_name":{"type":"string"},"mountpoint":{"type":"string"},"resource":{"type":["string","null"]},"timestamp":{"type":"integer","minimum":0},"value":{"type":"string"}}},"ApiError":{"type":"object","description":"Standard error response for API endpoints","required":["message"],"properties":{"code":{"type":["string","null"],"description":"HTTP status code"},"details":{"description":"Optional additional error details"},"message":{"type":"string","description":"Error message describing what went wrong"}}}}},"paths":{"/v2/metrics/k8":{"get":{"tags":["Metrics"],"summary":"Get metrics (K8s)","operationId":"get_k8_metrics","responses":{"200":{"description":"Returns k8 metrics from duckdb","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/NodeResourceMetricList"}}}}},"500":{"description":"Failed to connect/fetch from metrics table from database","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ApiError"}}}}}}}}}
```

## GET /v2/metrics/slurm

> Get metrics (Slurm)

```json
{"openapi":"3.1.0","info":{"title":"Cedana API","version":"0.0.0-staging"},"tags":[{"name":"Metrics","description":"Cedana API for metrics"}],"security":[{"Token":[]}],"components":{"securitySchemes":{"Token":{"type":"http","scheme":"bearer"}},"schemas":{"SlurmMetric":{"type":"object","required":["id","name","timestamp"],"properties":{"counter_value":{"type":["number","null"],"format":"double"},"gauge_value":{"type":["number","null"],"format":"double"},"id":{"type":"integer","format":"int64"},"kind":{"type":["string","null"]},"name":{"type":"string"},"tags":{},"timestamp":{"type":"integer","minimum":0}}},"ApiError":{"type":"object","description":"Standard error response for API endpoints","required":["message"],"properties":{"code":{"type":["string","null"],"description":"HTTP status code"},"details":{"description":"Optional additional error details"},"message":{"type":"string","description":"Error message describing what went wrong"}}}}},"paths":{"/v2/metrics/slurm":{"get":{"tags":["Metrics"],"summary":"Get metrics (Slurm)","operationId":"get_slurm_metrics","responses":{"200":{"description":"Returns top 10 latest SLURM metrics from duckdb","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/SlurmMetric"}}}}},"500":{"description":"Failed to connect/fetch from SLURM metrics table in database","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ApiError"}}}}}}}}}
```

## GET /v2/metrics/slurm/overview

> Get cluster overview (Slurm)

```json
{"openapi":"3.1.0","info":{"title":"Cedana API","version":"0.0.0-staging"},"tags":[{"name":"Metrics","description":"Cedana API for metrics"}],"security":[{"Token":[]}],"components":{"securitySchemes":{"Token":{"type":"http","scheme":"bearer"}},"schemas":{"SlurmClusterOverview":{"type":"object","required":["timestamp"],"properties":{"cpu_load":{"type":["number","null"],"format":"double"},"cpus_idle":{"type":["number","null"],"format":"double"},"cpus_total":{"type":["number","null"],"format":"double"},"mem_alloc":{"type":["number","null"],"format":"double"},"mem_free":{"type":["number","null"],"format":"double"},"mem_real":{"type":["number","null"],"format":"double"},"timestamp":{"type":"integer","minimum":0}}},"ApiError":{"type":"object","description":"Standard error response for API endpoints","required":["message"],"properties":{"code":{"type":["string","null"],"description":"HTTP status code"},"details":{"description":"Optional additional error details"},"message":{"type":"string","description":"Error message describing what went wrong"}}}}},"paths":{"/v2/metrics/slurm/overview":{"get":{"tags":["Metrics"],"summary":"Get cluster overview (Slurm)","operationId":"get_slurm_cluster_overview","responses":{"200":{"description":"Returns latest SLURM cluster overview statistics","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SlurmClusterOverview"}}}},"500":{"description":"Failed to fetch SLURM cluster overview","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ApiError"}}}}}}}}}
```

## GET /v2/metrics/slurm/overview/state

> Get state breakdown (Slurm)

```json
{"openapi":"3.1.0","info":{"title":"Cedana API","version":"0.0.0-staging"},"tags":[{"name":"Metrics","description":"Cedana API for metrics"}],"security":[{"Token":[]}],"components":{"securitySchemes":{"Token":{"type":"http","scheme":"bearer"}},"schemas":{"SlurmStateBreakdown":{"type":"object","required":["timestamp","cpus_by_state","nodes_by_state"],"properties":{"cpus_by_state":{"type":"array","items":{"$ref":"#/components/schemas/StateItem"}},"nodes_by_state":{"type":"array","items":{"$ref":"#/components/schemas/StateItem"}},"timestamp":{"type":"integer","minimum":0}}},"StateItem":{"type":"object","required":["state","value"],"properties":{"state":{"type":"string"},"value":{"type":"number","format":"double"}}},"ApiError":{"type":"object","description":"Standard error response for API endpoints","required":["message"],"properties":{"code":{"type":["string","null"],"description":"HTTP status code"},"details":{"description":"Optional additional error details"},"message":{"type":"string","description":"Error message describing what went wrong"}}}}},"paths":{"/v2/metrics/slurm/overview/state":{"get":{"tags":["Metrics"],"summary":"Get state breakdown (Slurm)","operationId":"get_slurm_state_breakdown","responses":{"200":{"description":"Returns state breakdown for CPUs and nodes","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SlurmStateBreakdown"}}}},"500":{"description":"Failed to fetch SLURM state breakdown","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ApiError"}}}}}}}}}
```

## GET /v2/metrics/slurm/partitions

> Get partition statistics (Slurm)

```json
{"openapi":"3.1.0","info":{"title":"Cedana API","version":"0.0.0-staging"},"tags":[{"name":"Metrics","description":"Cedana API for metrics"}],"security":[{"Token":[]}],"components":{"securitySchemes":{"Token":{"type":"http","scheme":"bearer"}},"schemas":{"PartitionStats":{"type":"object","required":["partition_name","timestamp"],"properties":{"cpu_load":{"type":["number","null"],"format":"double"},"free_memory":{"type":["number","null"],"format":"double"},"idle_cpus":{"type":["number","null"],"format":"double"},"idle_nodes":{"type":["number","null"],"format":"double"},"partition_name":{"type":"string"},"real_memory":{"type":["number","null"],"format":"double"},"timestamp":{"type":"integer","minimum":0},"total_cpus":{"type":["number","null"],"format":"double"}}},"ApiError":{"type":"object","description":"Standard error response for API endpoints","required":["message"],"properties":{"code":{"type":["string","null"],"description":"HTTP status code"},"details":{"description":"Optional additional error details"},"message":{"type":"string","description":"Error message describing what went wrong"}}}}},"paths":{"/v2/metrics/slurm/partitions":{"get":{"tags":["Metrics"],"summary":"Get partition statistics (Slurm)","operationId":"get_partition_stats","responses":{"200":{"description":"Returns list of all partitions with their statistics","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/PartitionStats"}}}}},"500":{"description":"Failed to fetch partition statistics","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ApiError"}}}}}}}}}
```

## Get partition time series (Slurm)

> Returns CPU load and free memory time series for the specified partition over the last 10 data\
> points

```json
{"openapi":"3.1.0","info":{"title":"Cedana API","version":"0.0.0-staging"},"tags":[{"name":"Metrics","description":"Cedana API for metrics"}],"security":[{"Token":[]}],"components":{"securitySchemes":{"Token":{"type":"http","scheme":"bearer"}},"schemas":{"PartitionTimeSeries":{"type":"object","required":["partition_name","cpu_load","free_memory"],"properties":{"cpu_load":{"type":"array","items":{"$ref":"#/components/schemas/TimeSeriesDataPoint"}},"free_memory":{"type":"array","items":{"$ref":"#/components/schemas/TimeSeriesDataPoint"}},"partition_name":{"type":"string"}}},"TimeSeriesDataPoint":{"type":"object","required":["timestamp","value"],"properties":{"timestamp":{"type":"integer","minimum":0},"value":{"type":"number","format":"double"}}},"ApiError":{"type":"object","description":"Standard error response for API endpoints","required":["message"],"properties":{"code":{"type":["string","null"],"description":"HTTP status code"},"details":{"description":"Optional additional error details"},"message":{"type":"string","description":"Error message describing what went wrong"}}}}},"paths":{"/v2/metrics/slurm/partitions/{name}/timeseries":{"get":{"tags":["Metrics"],"summary":"Get partition time series (Slurm)","description":"Returns CPU load and free memory time series for the specified partition over the last 10 data\npoints","operationId":"get_partition_timeseries","parameters":[{"name":"name","in":"path","description":"Partition name to query timeseries for","required":true,"schema":{"type":"string"}}],"responses":{"200":{"description":"Returns time series data for the specified partition","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PartitionTimeSeries"}}}},"500":{"description":"Failed to fetch partition time series","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ApiError"}}}}}}}}}
```

## GET /v2/metrics/slurm/timeseries/cpu-load

> Get CPU load time series (Slurm)

```json
{"openapi":"3.1.0","info":{"title":"Cedana API","version":"0.0.0-staging"},"tags":[{"name":"Metrics","description":"Cedana API for metrics"}],"security":[{"Token":[]}],"components":{"securitySchemes":{"Token":{"type":"http","scheme":"bearer"}},"schemas":{"TimeSeriesDataPoint":{"type":"object","required":["timestamp","value"],"properties":{"timestamp":{"type":"integer","minimum":0},"value":{"type":"number","format":"double"}}},"ApiError":{"type":"object","description":"Standard error response for API endpoints","required":["message"],"properties":{"code":{"type":["string","null"],"description":"HTTP status code"},"details":{"description":"Optional additional error details"},"message":{"type":"string","description":"Error message describing what went wrong"}}}}},"paths":{"/v2/metrics/slurm/timeseries/cpu-load":{"get":{"tags":["Metrics"],"summary":"Get CPU load time series (Slurm)","operationId":"get_cpu_load_timeseries","responses":{"200":{"description":"Returns CPU load time series (last 10 data points)","content":{"application/json":{"schema":{"type":"array","items":{"$ref":"#/components/schemas/TimeSeriesDataPoint"}}}}},"500":{"description":"Failed to fetch CPU load time series","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ApiError"}}}}}}}}}
```

## GET /v2/metrics/slurm/timeseries/memory

> Get memory time series (Slurm)

```json
{"openapi":"3.1.0","info":{"title":"Cedana API","version":"0.0.0-staging"},"tags":[{"name":"Metrics","description":"Cedana API for metrics"}],"security":[{"Token":[]}],"components":{"securitySchemes":{"Token":{"type":"http","scheme":"bearer"}},"schemas":{"MemoryTimeSeries":{"type":"object","required":["free_memory","allocated_memory"],"properties":{"allocated_memory":{"type":"array","items":{"$ref":"#/components/schemas/TimeSeriesDataPoint"}},"free_memory":{"type":"array","items":{"$ref":"#/components/schemas/TimeSeriesDataPoint"}}}},"TimeSeriesDataPoint":{"type":"object","required":["timestamp","value"],"properties":{"timestamp":{"type":"integer","minimum":0},"value":{"type":"number","format":"double"}}},"ApiError":{"type":"object","description":"Standard error response for API endpoints","required":["message"],"properties":{"code":{"type":["string","null"],"description":"HTTP status code"},"details":{"description":"Optional additional error details"},"message":{"type":"string","description":"Error message describing what went wrong"}}}}},"paths":{"/v2/metrics/slurm/timeseries/memory":{"get":{"tags":["Metrics"],"summary":"Get memory time series (Slurm)","operationId":"get_memory_timeseries","responses":{"200":{"description":"Returns memory time series (last 10 data points for free and allocated)","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MemoryTimeSeries"}}}},"500":{"description":"Failed to fetch memory time series","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ApiError"}}}}}}}}}
```


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://docs.cedana.ai/references/api/metrics.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
