GPT2-NECK-SWEEP_20240118-230655-DB4bA_dataset_name-rand_hidden_idxs-11_hidden_lb-0_neck_cls-mlp_pretrained-0_token_lb-0_epoch=00-val_self_loss=10.70.ckpt