PufferAI · PLAZMAMA · May 24, 2025 · May 24, 2025 · May 24, 2025 · May 24, 2025
diff --git a/config/boids.ini b/config/boids.ini
@@ -1,68 +1,66 @@
 [base]
+package = ocean
 env_name = boids
+policy_name = Policy
+rnn_name = Recurrent
 
 [env]
 num_envs = 64
-num_boids = 64
+num_agents = 64
 ; num_envs = 1
-; num_boids = 1
-margin_turn_factor = 0.0
-centering_factor = 0.00
-avoid_factor = 1.00
-matching_factor = 1.00
+; num_agents = 5
+report_interval = 1
+; margin_turn_factor = 1.0
+; cohesion_factor = 0.0048
+; separation_factor = 0.0128
+; alignment_factor = 0.02
+margin_turn_factor = 1.0
+cohesion_factor = 0.001
+separation_factor = 0.0
+alignment_factor = 0.0
 
 [vec]
-num_workers = 2
-num_envs = 2
-batch_size = auto
+total_agents = 4096
+num_buffers = 8
+num_threads = 8
 
 [train]
 total_timesteps = 100_000_000
 gamma = 0.95
 learning_rate = 0.025
 minibatch_size = 16384
-; minibatch_size = 1
 
-; [sweep]
-; method = protein
-; metric = episode_length
+[sweep]
+method = Protein
+metric = perf
+sweep_only = margin_turn_factor, cohesion_factor, separation_factor, alignment_factor
 
-; [sweep.train.total_timesteps]
-; distribution = log_normal
-; min = 1e6
-; max = 1e7
-; mean = 5e6
-; scale = 0.5
+[sweep.train.total_timesteps]
+distribution = log_normal
+min = 1e3
+max = 1e7
+scale = time
 
-; [sweep.train.gamma]
-; distribution = log_normal
-; min = 0.9
-; max = 0.999
-; mean = 0.97
+[sweep.env.margin_turn_factor]
+distribution = log_normal
+min = 0.01
+max = 5.0
+scale = auto
 
-; [sweep.train.gae_lambda]
-; distribution = log_normal
-; min = 0.7
-; max = 0.999
-; mean = 0.95
-
-; [sweep.train.learning_rate]
-; distribution = log_normal
-; min = 0.0001
-; max = 0.001
-; mean = 0.00025
-; scale = 0.5
-
-; [sweep.train.batch_size]
-; min = 32768
-; max = 131072
-; mean = 65536
-; scale = 0.5
-
-; [sweep.train.minibatch_size]
-; min = 512
-; max = 2048
-; mean = 1024
-; scale = 0.5
+[sweep.env.cohesion_factor]
+distribution = log_normal
+min = 0.01
+max = 1
+scale = auto
 
+[sweep.env.separation_factor]
+distribution = log_normal
+min = 0.01
+max = 1
+scale = auto
 
+[sweep.env.alignment_factor]
+distribution = log_normal
+min = 0.01
+max = 1
+scale = auto
diff --git a/ocean/boids/binding.c b/ocean/boids/binding.c
@@ -1,24 +1,27 @@
 #include "boids.h"
+#define OBS_SIZE 512 // 64 boids * 8 obs per boid
+#define NUM_ATNS 2   // Two discrete actions per boid
+#define ACT_SIZES {3, 3}
+#define OBS_TENSOR_T FloatTensor
 
 #define Env Boids
-#include "../env_binding.h"
+#include "vecenv.h"
 
-static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
-    env->num_boids = unpack(kwargs, "num_boids");
-    env->report_interval = unpack(kwargs, "report_interval");
-    env->margin_turn_factor = unpack(kwargs, "margin_turn_factor");
-    env->centering_factor = unpack(kwargs, "centering_factor");
-    env->avoid_factor = unpack(kwargs, "avoid_factor");
-    env->matching_factor = unpack(kwargs, "matching_factor");
+void my_init(Env* env, Dict* kwargs) {
+    env->num_agents = (unsigned int)dict_get(kwargs, "num_agents")->value;
+    env->report_interval = (unsigned)dict_get(kwargs, "report_interval")->value;
+    env->margin_turn_factor = (float)dict_get(kwargs, "margin_turn_factor")->value;
+    env->cohesion_factor = (float)dict_get(kwargs, "cohesion_factor")->value;
+    env->separation_factor = (float)dict_get(kwargs, "separation_factor")->value;
+    env->alignment_factor = (float)dict_get(kwargs, "alignment_factor")->value;
     init(env);
-    return 0;
 }
 
-static int my_log(PyObject* dict, Log* log) {
-    assign_to_dict(dict, "perf", log->perf);
-    assign_to_dict(dict, "score", log->score);
-    assign_to_dict(dict, "episode_return", log->episode_return);
-    assign_to_dict(dict, "episode_length", log->episode_length);
-    assign_to_dict(dict, "n", log->n);
-    return 0;
+void my_log(Log* log, Dict* out) {
+    dict_set(out, "score", log->score);
+    dict_set(out, "margin_turn_reward", log->t_margin_turn_reward);
+    dict_set(out, "cohesion_reward", log->t_cohesion_reward);
+    dict_set(out, "separation_reward", log->t_separation_reward);
+    dict_set(out, "alignment_reward", log->t_alignment_reward);
+    dict_set(out, "n", log->n);
 }
diff --git a/ocean/boids/boids.c b/ocean/boids/boids.c
@@ -1,40 +1,37 @@
 // Standalone C demo for Boids environment
-// Compile using: ./scripts/build_ocean.sh boids [local|fast]
+// Compile using: ./scripts/build.sh boids [local|fast]
 // Run with: ./boids
 
+
 #include <time.h>
 #include "boids.h"
+#include <stdlib.h>
 
 // --- Demo Configuration ---
-#define NUM_BOIDS_DEMO 20   // Number of boids for the standalone demo
-#define MAX_STEPS_DEMO 500 // Max steps per episode in the demo
+#define num_agents_DEMO 32  // Number of boids for the standalone demo
+#define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo
+#define MAX_STEPS_DEMO 10000 // Max steps per episode in the demo
 #define ACTION_SCALE 3.0f   // Corresponds to action space [-3.0, 3.0]
 
-// Dummy action generation: random velocity changes for each boid
 void generate_dummy_actions(Boids* env) {
-    for (unsigned int i = 0; i < env->num_boids; ++i) {
-        // Generate random floats in [-1, 1] range
+    for (unsigned int i = 0; i < env->num_agents; ++i) {
         float rand_vx = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
         float rand_vy = ((float)rand() / (float)RAND_MAX) * 2.0f - 1.0f;
-
-        // Scale to the action space [-ACTION_SCALE, ACTION_SCALE]
         env->actions[i * 2 + 0] = rand_vx * ACTION_SCALE;
         env->actions[i * 2 + 1] = rand_vy * ACTION_SCALE;
     }
 }
 
 void demo() {
-    // Initialize Boids environment struct
     Boids env = {0}; 
-    env.num_boids = NUM_BOIDS_DEMO;
+    env.num_agents = num_agents_DEMO;
+    env.report_interval = REPORT_INTERVAL_DEMO;
 
-    // In the Python binding, these pointers are assigned from NumPy arrays.
-    // Here, we need to allocate them explicitly.
-    size_t obs_size = env.num_boids * 4; // num_boids * (x, y, vx, vy)
-    size_t act_size = env.num_boids * 2; // num_boids * (dvx, dvy)
+    size_t obs_size = env.num_agents * env.num_agents * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy)
+    size_t act_size = env.num_agents * 2; // the 2 = (dvx, dvy)
     env.observations = (float*)calloc(obs_size, sizeof(float));
     env.actions = (float*)calloc(act_size, sizeof(float));
-    env.rewards = (float*)calloc(env.num_boids, sizeof(float)); // Env-level reward
+    env.rewards = (float*)calloc(env.num_agents, sizeof(float)); // Env-level reward
 
     if (!env.observations || !env.actions || !env.rewards) {
         fprintf(stderr, "ERROR: Failed to allocate memory for demo buffers.\n");
@@ -57,7 +54,7 @@ void demo() {
     c_reset(&env);
     int total_steps = 0;
 
-    printf("Starting Boids demo with %d boids. Press ESC to exit.\n", env.num_boids);
+    printf("Starting Boids demo with %u boids. Press ESC to exit.\n", env.num_agents);
 
     while (!WindowShouldClose() && total_steps < MAX_STEPS_DEMO) { // Raylib function to check if ESC is pressed or window closed
         generate_dummy_actions(&env);