forked from KEMT/zpwiki
		
	
		
			
				
	
	
		
			33 lines
		
	
	
		
			860 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			33 lines
		
	
	
		
			860 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
# train.yaml
 | 
						|
 | 
						|
## Where the samples will be written
 | 
						|
save_data: dp2022/run2/example
 | 
						|
## Where the vocab(s) will be written
 | 
						|
src_vocab: dp2022/run2/example.vocab.src
 | 
						|
tgt_vocab: dp2022/run2/example.vocab.tgt
 | 
						|
# Prevent overwriting existing files in the folder
 | 
						|
overwrite: False
 | 
						|
 | 
						|
 | 
						|
# Corpus opts
 | 
						|
data:
 | 
						|
        corpus_1:
 | 
						|
                path_src: dp2022/europarl-v7.sk-en.en
 | 
						|
                path_tgt: dp2022/europarl-v7.sk-en.sk
 | 
						|
        valid:
 | 
						|
                path_src: dp2022/europarl-v7.clean.sk-en.en
 | 
						|
                path_tgt: dp2022/europarl-v7.clean.sk-en.sk
 | 
						|
 | 
						|
# Vocabulary files that were just created
 | 
						|
src_vocab: dp2022/run2/example.vocab.src
 | 
						|
tgt_vocab: dp2022/run2/example.vocab.tgt
 | 
						|
 | 
						|
# Train on a single GPU
 | 
						|
world_size: 1
 | 
						|
gpu_ranks: [0]
 | 
						|
 | 
						|
# Where to save the checkpoints
 | 
						|
save_model: dp2022/run2/model
 | 
						|
save_checkpoint_steps: 1000
 | 
						|
train_steps: 20000
 | 
						|
valid_steps: 10000 |