PathBench-MIL/conf_optimization.yaml at main · Sbrussee/PathBench-MIL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
experiment:
  project_name: Exp1 # Name of the project
  annotation_file: /path/to/your/annotations # Path to the annotation file
  balancing: category # Balancing can be 'tile', 'slide', 'patient' or 'category'
  class_weighting: True # Whether to use class weighting for imbalanced datasets
  split_technique: k-fold # Split technique can be 'k-fold' or 'fixed' or 'k_fold_site_preserved'
  k: 3 # Number of folds for k-fold cross-validation
  val_fraction: 0.1 # Fraction of the training data to be used for validation
  best_epoch_based_on: roc_auc_score # Metric to be used for selecting the best epoch, can be val_loss or any metric in evaluation
  epochs: 25 # Number of epochs for training
  batch_size: 32 # Batch size for training
  bag_size: 512 # Bag size for MIL models
  encoder_layers: 1 # Number of layers in the MIL encoder
  z_dim: 256 # Dimension of the latent space in the MIL encoder
  dropout_p: 0.1 # Dropout probability for the MIL model
  num_workers: 4 # Number of workers for data loading, 0 for no parallelization.
  aggregation_level: slide # Aggregation level can be 'slide' or 'patient'
  task: classification # Task can be 'classification', 'regression' or 'survival' or 'survival_discrete'
  mode: optimization # Mode can be either 'benchmark' or 'optimization'.
  custom_metrics: [RocAuc] # Custom metrics to be used, needs to be defined in metrics.py or as a fastai-metric: https://docs.fast.ai/metrics.html
  report: False # Whether to generate a tile extraction report
  skip_extracted: True # Whether to skip the tile extraction step if tiles already exist
  skip_feature_extraction: True # Whether to skip the feature extraction step if features already exist
  save_tiles: False # Whether to save the extracted tile images as .jpg files in addition to the .tfrecords
  mixed_precision: True # Whether to use mixed precision feature extraction
  save_heatmaps_test: True # Whether to save heatmaps for test slides
  save_heatmaps_val: False # Whether to save heatmaps for validation slides

  qc: # List of quality control methods to be used
    - GaussianV2
    - Otsu-CLAHE

  qc_filters: #Tile-level filters for discarding tiles
    grayspace_threshold : 0.05 #Pixels below this value (ranged 0-1) are considered gray.
    grayspace_fraction: 0.6 # Image tiles with grayspace above this fraction are discarded.
    whitespace_threshold: 230 #Pixel intensities (0-255) above this value are considered white.
    whitespace_fraction: 1.0 # Image tiles with whitespace above this fraction are discarded.

  evaluation: # Evaluation metrics to be used
    - balanced_accuracy
    - auc

  visualization: # Visualization methods to be used
    - confusion_matrix
    - roc_curve

optimization: # Optimization parameters
  study_name: Exp1 # Name of the optimization study
  load_study: True # Whether to load an existing study, which continues the optimization from a checkpoint
  objective_metric: balanced_accuracy # Objective metric to be optimized
  objective_mode: max # Optimization mode, can be 'max' or 'min'
  objective_dataset: test # Dataset to be used for the objective metric, can be 'val' or 'test'
  sampler: TPESampler # Sampler to be used, inherets from optuna.samplers
  pruner: HyperbandPruner # Pruner to be used, inherets from optuna.pruners
  trials: 20 # Number of trials to be run


datasets: # List of datasets to be used
  - name: training1 # Name of the dataset
    slide_path: path/to/your/slides # Path to the slide data
    tfrecord_path: path/to/your/tfrecords # Path to save tfrecords
    tile_path: path/to/save/tiles # Path to save tiles
    used_for: training # Whether the dataset is used for training or testing

  - name: testing1
    slide_path: path/to/your/slides
    tfrecord_path: path/to/your/tfrecords
    tile_path: path/to/save/tiles
    used_for: testing

benchmark_parameters:
  tile_px: # Tile sizes in pixels
    - 256
  tile_um: # Magnifications (e.g. 40x, 20x, 10x)
    - 20x
    - 10x
  normalization: # Normalization methods, can be 'macenko', 'reinhard' or 'cyclegan'
    - macenko
    - reinhard

  feature_extraction: # Feature extraction methods, as specified in feature_extractors.py
    - resnet50_imagenet
    - h_optimus_0
    - hibou_l
    - virchow2
    - uni
    - conch

  mil: # MIL models, as specified in aggregators.py
    - clam_mil
    - dsmil
    - varmil
    - attention_mil
    - transmil

  loss: # Loss functions, as specified in losses.py
    - CrossEntropyLoss

  activation_function: # activation function for the MIL encoder, supports any pytorch.nn activation function.
    - ReLU

  optimizer: # Optimizers, can be any fastai optimizer (Default: Adam)
    - Adam

weights_dir: ./pretrained_weights # Path to pretrained weights and/or where to save retrieved weights, defaults to the pretrained_weights directory in the PathDev repository
hf_key: YOUR_HUGGINGFACE_TOKEN # Token for Hugging Face model hub to access gated models, if you do not have one, just set to None