Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
80 commits
Select commit Hold shift + click to select a range
0739463
rebasing improved boids to new 4.0
PLAZMAMA May 24, 2025
fcc859a
remove unused log fields
PLAZMAMA May 24, 2025
568e620
remove unused variable
PLAZMAMA May 24, 2025
d1e457e
remove unused commented code
PLAZMAMA May 24, 2025
31ba846
remove unused boid_logs and fix logs calculation
PLAZMAMA May 29, 2025
374e167
fix overflow and zero report_interval
PLAZMAMA May 31, 2025
c5b5dd6
add above zero checks for num_boids and report_interval
PLAZMAMA May 31, 2025
2f0e99b
remove unused commented flat_actions
PLAZMAMA Jun 2, 2025
c93ad08
simplify seperation reward and test it
PLAZMAMA Jun 2, 2025
232dfb6
test out only avoid factor
PLAZMAMA Jun 2, 2025
ba3a091
remove unused avg_reward and change seperation factor reward
PLAZMAMA Jun 3, 2025
564e718
fix factor names
PLAZMAMA Jun 4, 2025
45aa2a2
remove unused commented code
PLAZMAMA Jun 4, 2025
6fbd13c
fix seperation factor reward calculation
PLAZMAMA Jun 5, 2025
d33c688
remove unused commented params
PLAZMAMA Jun 5, 2025
d3ca11d
remove normalization from separation factor calculation
PLAZMAMA Jun 5, 2025
cb31157
fix visual range
PLAZMAMA Jun 5, 2025
6fdeba6
remove positve margin rewards and remove commented code
PLAZMAMA Jun 6, 2025
0289751
add factors to env run with "boids.c"
PLAZMAMA Jun 6, 2025
0a18e2e
add debug margin lines and adjust reward normalization
PLAZMAMA Jun 6, 2025
8c69a45
only turn on margin turn factor and adjust total timesteps
PLAZMAMA Jun 6, 2025
2fc5c9e
change top/bottom margins
PLAZMAMA Jun 6, 2025
509fed7
account for boid width and hight in margin reward calculation
PLAZMAMA Jun 6, 2025
7a12bb7
increase max steps
PLAZMAMA Jun 11, 2025
6dbbc74
remove debug margin lines
PLAZMAMA Jun 12, 2025
e7db00e
fix observations for margin factor
PLAZMAMA Jun 12, 2025
7a49020
remove single agent params
PLAZMAMA Jun 12, 2025
997e63d
update boids.c observations allocation
PLAZMAMA Jun 12, 2025
e8641fa
update observations and actions comments
PLAZMAMA Jun 12, 2025
837ae37
remove commented parameters and update parameters to current best
PLAZMAMA Jul 4, 2025
67481cc
fix to "separation_factor" instead of "seperation_factor"
PLAZMAMA Jul 4, 2025
1511dbe
update preset env parameters
PLAZMAMA Jul 4, 2025
24679c3
condence controlled boid observation loop
PLAZMAMA Jul 4, 2025
f1caedd
remove use of protected range diff
PLAZMAMA Jul 7, 2025
4f1656a
change reward normalization number
PLAZMAMA Jul 7, 2025
0fe8839
update puffer resource path
PLAZMAMA Jul 8, 2025
bd4827c
enable all factors
PLAZMAMA Jul 27, 2025
aa85ccc
add euclidean distance to observations
PLAZMAMA Jul 27, 2025
9596181
add euclidean distance to local build observations
PLAZMAMA Jul 27, 2025
98fd5de
move boids config to new 4.0 location
PLAZMAMA Apr 4, 2026
55f772e
remove legacy boids.py
PLAZMAMA Apr 11, 2026
a0e8141
correct most recent factor values
PLAZMAMA Apr 11, 2026
d1f9ee2
add todo for factor normalization in reward calculation
PLAZMAMA Apr 11, 2026
b978ea6
fix/update env name
PLAZMAMA Apr 14, 2026
2be7efe
converting env names to vecenv compatible naming schemes
PLAZMAMA Apr 14, 2026
e0eb22e
condense boids stuct comments
PLAZMAMA Apr 14, 2026
72b1281
removing commented unnormalized rewards
PLAZMAMA Apr 14, 2026
4e4e0f3
remove unused dist var
PLAZMAMA Apr 14, 2026
a8bff56
bring random action back from 4.0
PLAZMAMA Apr 18, 2026
74d8f0d
add missing constant
PLAZMAMA Apr 18, 2026
1c0fb4e
remove unused vars assignment and constants
PLAZMAMA Apr 18, 2026
4e4ee92
fixing rewards in progress
PLAZMAMA Apr 19, 2026
787b663
normalize factors
PLAZMAMA Apr 24, 2026
31e580a
improve commented swep config, enable it
PLAZMAMA Apr 24, 2026
10ea435
comment debug prints and adjust reward normalization
PLAZMAMA Apr 24, 2026
aa93069
fix binding for 64 boids
PLAZMAMA Apr 25, 2026
aa881ed
fixed boids positive only actions(dumb me)
PLAZMAMA Apr 26, 2026
6b21d96
reduce actions to -1,0,1 (hopefully)
PLAZMAMA Apr 26, 2026
0b4d618
Update boid action offset and adjust reward factor testing
PLAZMAMA May 2, 2026
b65a0f0
Log per-factor reward contributions for boids
PLAZMAMA May 5, 2026
bb4bf8c
rename reward vars, and compress margin turn factor reward calc
PLAZMAMA May 5, 2026
39a9f79
rename rewards for clarification and training is screwed again!!!!!!!!!!
PLAZMAMA May 5, 2026
aef44d6
fix margin turn factor
PLAZMAMA May 6, 2026
43e092a
update rewards to 64
PLAZMAMA May 7, 2026
09fe18d
good cohesion and seperation magnitudes
PLAZMAMA May 7, 2026
fed0611
remove unused commented configs
PLAZMAMA May 9, 2026
73b64f0
remove debug prints
PLAZMAMA May 9, 2026
3a4c0ec
remove unused includes and MAX_DIST
PLAZMAMA May 9, 2026
a1b8a41
clean struct declaration
PLAZMAMA May 9, 2026
bf1304a
remove duplication and rename indx to idx
PLAZMAMA May 9, 2026
884cb85
rename respawn to spawn
PLAZMAMA May 9, 2026
5ad536e
compress init boid spawn loop
PLAZMAMA May 9, 2026
b34ee4f
unstack c_render function
PLAZMAMA May 9, 2026
efaf648
deduplicate boid velocity application
PLAZMAMA May 9, 2026
f2324f9
rename func to apply_action
PLAZMAMA May 9, 2026
3393983
remove unused commented code
PLAZMAMA May 10, 2026
21fb325
remove useless perf metric
PLAZMAMA May 10, 2026
3a5427f
reward func checkpt
PLAZMAMA May 10, 2026
dfb5e7f
remove rewards struct
PLAZMAMA May 14, 2026
e609064
remove angle diff and all the dead code it creates
PLAZMAMA May 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 46 additions & 48 deletions config/boids.ini
Original file line number Diff line number Diff line change
@@ -1,68 +1,66 @@
[base]
package = ocean
env_name = boids
policy_name = Policy
rnn_name = Recurrent

[env]
num_envs = 64
num_boids = 64
num_agents = 64
; num_envs = 1
; num_boids = 1
margin_turn_factor = 0.0
centering_factor = 0.00
avoid_factor = 1.00
matching_factor = 1.00
; num_agents = 5
report_interval = 1
; margin_turn_factor = 1.0
; cohesion_factor = 0.0048
; separation_factor = 0.0128
; alignment_factor = 0.02
margin_turn_factor = 1.0
cohesion_factor = 0.001
separation_factor = 0.0
alignment_factor = 0.0

[vec]
num_workers = 2
num_envs = 2
batch_size = auto
total_agents = 4096
num_buffers = 8
num_threads = 8

[train]
total_timesteps = 100_000_000
gamma = 0.95
learning_rate = 0.025
minibatch_size = 16384
; minibatch_size = 1

; [sweep]
; method = protein
; metric = episode_length
[sweep]
method = Protein
metric = perf
sweep_only = margin_turn_factor, cohesion_factor, separation_factor, alignment_factor

; [sweep.train.total_timesteps]
; distribution = log_normal
; min = 1e6
; max = 1e7
; mean = 5e6
; scale = 0.5
[sweep.train.total_timesteps]
distribution = log_normal
min = 1e3
max = 1e7
scale = time

; [sweep.train.gamma]
; distribution = log_normal
; min = 0.9
; max = 0.999
; mean = 0.97
[sweep.env.margin_turn_factor]
distribution = log_normal
min = 0.01
max = 5.0
scale = auto

; [sweep.train.gae_lambda]
; distribution = log_normal
; min = 0.7
; max = 0.999
; mean = 0.95

; [sweep.train.learning_rate]
; distribution = log_normal
; min = 0.0001
; max = 0.001
; mean = 0.00025
; scale = 0.5

; [sweep.train.batch_size]
; min = 32768
; max = 131072
; mean = 65536
; scale = 0.5

; [sweep.train.minibatch_size]
; min = 512
; max = 2048
; mean = 1024
; scale = 0.5
[sweep.env.cohesion_factor]
distribution = log_normal
min = 0.01
max = 1
scale = auto

[sweep.env.separation_factor]
distribution = log_normal
min = 0.01
max = 1
scale = auto

[sweep.env.alignment_factor]
distribution = log_normal
min = 0.01
max = 1
scale = auto
35 changes: 19 additions & 16 deletions ocean/boids/binding.c
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
#include "boids.h"
#define OBS_SIZE 512 // 64 boids * 8 obs per boid
#define NUM_ATNS 2 // Two discrete actions per boid
#define ACT_SIZES {3, 3}
#define OBS_TENSOR_T FloatTensor

#define Env Boids
#include "../env_binding.h"
#include "vecenv.h"

static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
env->num_boids = unpack(kwargs, "num_boids");
env->report_interval = unpack(kwargs, "report_interval");
env->margin_turn_factor = unpack(kwargs, "margin_turn_factor");
env->centering_factor = unpack(kwargs, "centering_factor");
env->avoid_factor = unpack(kwargs, "avoid_factor");
env->matching_factor = unpack(kwargs, "matching_factor");
void my_init(Env* env, Dict* kwargs) {
env->num_agents = (unsigned int)dict_get(kwargs, "num_agents")->value;
env->report_interval = (unsigned)dict_get(kwargs, "report_interval")->value;
env->margin_turn_factor = (float)dict_get(kwargs, "margin_turn_factor")->value;
env->cohesion_factor = (float)dict_get(kwargs, "cohesion_factor")->value;
env->separation_factor = (float)dict_get(kwargs, "separation_factor")->value;
env->alignment_factor = (float)dict_get(kwargs, "alignment_factor")->value;
init(env);
return 0;
}

static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "perf", log->perf);
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
assign_to_dict(dict, "n", log->n);
return 0;
void my_log(Log* log, Dict* out) {
dict_set(out, "score", log->score);
dict_set(out, "margin_turn_reward", log->t_margin_turn_reward);
dict_set(out, "cohesion_reward", log->t_cohesion_reward);
dict_set(out, "separation_reward", log->t_separation_reward);
dict_set(out, "alignment_reward", log->t_alignment_reward);
dict_set(out, "n", log->n);
}
29 changes: 13 additions & 16 deletions ocean/boids/boids.c
Original file line number Diff line number Diff line change
@@ -1,40 +1,37 @@
// Standalone C demo for Boids environment
// Compile using: ./scripts/build_ocean.sh boids [local|fast]
// Compile using: ./scripts/build.sh boids [local|fast]
// Run with: ./boids


#include <time.h>
#include "boids.h"
#include <stdlib.h>

// --- Demo Configuration ---
#define NUM_BOIDS_DEMO 20 // Number of boids for the standalone demo
#define MAX_STEPS_DEMO 500 // Max steps per episode in the demo
#define num_agents_DEMO 32 // Number of boids for the standalone demo
#define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo
#define MAX_STEPS_DEMO 10000 // Max steps per episode in the demo
#define ACTION_SCALE 3.0f // Corresponds to action space [-3.0, 3.0]

// Dummy action generation: random velocity changes for each boid
void generate_dummy_actions(Boids* env) {
for (unsigned int i = 0; i < env->num_boids; ++i) {
// Generate random floats in [-1, 1] range
for (unsigned int i = 0; i < env->num_agents; ++i) {
float rand_vx = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
float rand_vy = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;

// Scale to the action space [-ACTION_SCALE, ACTION_SCALE]
env->actions[i * 2 + 0] = rand_vx * ACTION_SCALE;
env->actions[i * 2 + 1] = rand_vy * ACTION_SCALE;
}
}

void demo() {
// Initialize Boids environment struct
Boids env = {0};
env.num_boids = NUM_BOIDS_DEMO;
env.num_agents = num_agents_DEMO;
env.report_interval = REPORT_INTERVAL_DEMO;

// In the Python binding, these pointers are assigned from NumPy arrays.
// Here, we need to allocate them explicitly.
size_t obs_size = env.num_boids * 4; // num_boids * (x, y, vx, vy)
size_t act_size = env.num_boids * 2; // num_boids * (dvx, dvy)
size_t obs_size = env.num_agents * env.num_agents * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy)
size_t act_size = env.num_agents * 2; // the 2 = (dvx, dvy)
env.observations = (float*)calloc(obs_size, sizeof(float));
env.actions = (float*)calloc(act_size, sizeof(float));
env.rewards = (float*)calloc(env.num_boids, sizeof(float)); // Env-level reward
env.rewards = (float*)calloc(env.num_agents, sizeof(float)); // Env-level reward

if (!env.observations || !env.actions || !env.rewards) {
fprintf(stderr, "ERROR: Failed to allocate memory for demo buffers.\n");
Expand All @@ -57,7 +54,7 @@ void demo() {
c_reset(&env);
int total_steps = 0;

printf("Starting Boids demo with %d boids. Press ESC to exit.\n", env.num_boids);
printf("Starting Boids demo with %u boids. Press ESC to exit.\n", env.num_agents);

while (!WindowShouldClose() && total_steps < MAX_STEPS_DEMO) { // Raylib function to check if ESC is pressed or window closed
generate_dummy_actions(&env);
Expand Down
Loading
Loading