{"train/global_step":855,"_runtime":8958.612804502,"train/grad_norm":NaN,"_step":1,"train_samples_per_second":6.117,"train_steps_per_second":0.095,"total_flos":2.89331830849536e+16,"train/loss":Infinity,"_timestamp":1.7312666037862792e+09,"train/epoch":4.991789819376026,"train/learning_rate":2e-05,"train_runtime":8960.1161,"_wandb":{"runtime":8960}}