33 lines
		
	
	
		
			860 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			33 lines
		
	
	
		
			860 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
| # train.yaml
 | |
| 
 | |
| ## Where the samples will be written
 | |
| save_data: dp2022/run2/example
 | |
| ## Where the vocab(s) will be written
 | |
| src_vocab: dp2022/run2/example.vocab.src
 | |
| tgt_vocab: dp2022/run2/example.vocab.tgt
 | |
| # Prevent overwriting existing files in the folder
 | |
| overwrite: False
 | |
| 
 | |
| 
 | |
| # Corpus opts
 | |
| data:
 | |
|         corpus_1:
 | |
|                 path_src: dp2022/europarl-v7.sk-en.en
 | |
|                 path_tgt: dp2022/europarl-v7.sk-en.sk
 | |
|         valid:
 | |
|                 path_src: dp2022/europarl-v7.clean.sk-en.en
 | |
|                 path_tgt: dp2022/europarl-v7.clean.sk-en.sk
 | |
| 
 | |
| # Vocabulary files that were just created
 | |
| src_vocab: dp2022/run2/example.vocab.src
 | |
| tgt_vocab: dp2022/run2/example.vocab.tgt
 | |
| 
 | |
| # Train on a single GPU
 | |
| world_size: 1
 | |
| gpu_ranks: [0]
 | |
| 
 | |
| # Where to save the checkpoints
 | |
| save_model: dp2022/run2/model
 | |
| save_checkpoint_steps: 1000
 | |
| train_steps: 20000
 | |
| valid_steps: 10000 |