POST
/
training
/
grpo
import PiClient from 'withpi';

const client = new PiClient({
  apiKey: process.env['WITHPI_API_KEY'], // This is the default and can be omitted
});

async function main() {
  const response = await client.training.grpo.startJob({
    base_rl_model: 'LLAMA_3.2_3B',
    examples: [{ llm_input: 'Tell me something different' }],
    learning_rate: 0.000005,
    lora_config: {},
    num_train_epochs: 10,
    scoring_spec: {
      description: "Write a children's story communicating a simple life lesson.",
      dimensions: [
        {
          description: 'dimension1 description',
          label: 'dimension1',
          sub_dimensions: [
            { description: 'subdimension1 description', label: 'subdimension1', scoring_type: 'PI_SCORER' },
          ],
        },
      ],
      name: 'Sample Scoring Spec',
    },
    system_prompt: 'An optional system prompt.',
  });

  console.log(response.job_id);
}

main();
{
  "detailed_status": [
    "Downloading model",
    "Tuning prompt"
  ],
  "job_id": "1234abcd",
  "state": "RUNNING",
  "trained_models": [
    {
      "epoch": 123,
      "eval_loss": 123,
      "pi_score": 0,
      "serving_id": 123,
      "serving_state": "UNLOADED",
      "step": 123
    }
  ]
}

Authorizations

x-api-key
string
header
required

Body

application/json
base_rl_model
enum<string>
required

The base model to start the RL tunning process

Available options:
LLAMA_3.2_3B,
LLAMA_3.1_8B
examples
object[]
required

Examples to use in the RL tuning process

An example for RL training

learning_rate
number
required

GRPO learning rate

Example:

0.000005

lora_config
object
required

The LoRA configuration.

num_train_epochs
integer
required

GRPO number of train epochs

Example:

10

scoring_spec
object
required

The scoring spec to use in the GRPO tuning process

system_prompt
string | null
required

A custom system prompt to use during the RL tuning process

Example:

"An optional system prompt."

Response

200
application/json
Successful Response

RlGrpoStatus is the status of a RL PPO job.

detailed_status
string[]
required

Detailed status of the job

Example:
["Downloading model", "Tuning prompt"]
job_id
string
required

The job id

Example:

"1234abcd"

state
enum<string>
required

Current state of the job

Available options:
QUEUED,
RUNNING,
DONE,
ERROR,
CANCELLED
trained_models
object[] | null

A list of trained models selected based on the PI score.