Update 'pages/students/2016/patrik_pavlisin/dp2022/train1.sh'
This commit is contained in:
parent
512f3adff4
commit
e4851a5bc8
@ -0,0 +1,33 @@
|
|||||||
|
# train.yaml
|
||||||
|
|
||||||
|
## Where the samples will be written
|
||||||
|
save_data: dp2022/run2/example
|
||||||
|
## Where the vocab(s) will be written
|
||||||
|
src_vocab: dp2022/run2/example.vocab.src
|
||||||
|
tgt_vocab: dp2022/run2/example.vocab.tgt
|
||||||
|
# Prevent overwriting existing files in the folder
|
||||||
|
overwrite: False
|
||||||
|
|
||||||
|
|
||||||
|
# Corpus opts
|
||||||
|
data:
|
||||||
|
corpus_1:
|
||||||
|
path_src: dp2022/europarl-v7.sk-en.en
|
||||||
|
path_tgt: dp2022/europarl-v7.sk-en.sk
|
||||||
|
transforms: [onmt_tokenize]
|
||||||
|
weight: 1
|
||||||
|
valid:
|
||||||
|
path_src: dp2022/europarl-v7.clean.sk-en.en
|
||||||
|
path_tgt: dp2022/europarl-v7.clean.sk-en.sk
|
||||||
|
transforms: [onmt_tokenize]
|
||||||
|
|
||||||
|
# Vocabulary files that were just created
|
||||||
|
src_vocab: dp2022/run2/example.vocab.src
|
||||||
|
tgt_vocab: dp2022/run2/example.vocab.tgt
|
||||||
|
|
||||||
|
# Where to save the checkpoints
|
||||||
|
save_model: dp2022/run2/model
|
||||||
|
|
||||||
|
save_checkpoint_steps: 1000
|
||||||
|
train_steps: 20000
|
||||||
|
valid_steps: 10000
|
Loading…
Reference in New Issue
Block a user