zpwiki/pages/students/2016/jakub_maruniak/dp2021/annotation/train.sh

25 lines
919 B
Bash

set -e
OUTDIR=build/train/output
TRAINDIR=build/train
# Delete old training results
rm -r $TRAINDIR
mkdir -p $TRAINDIR
mkdir -p $OUTDIR
mkdir -p dist
# Delete old training results
rm -rf $OUTDIR/*
# Train dependency and POS
spacy train sk $OUTDIR ./build/input/slovak-treebank ./build/input/ud-artificial-gapping --n-iter 15 -p tagger,parser
rm -rf $TRAINDIR/posparser
mv $OUTDIR/model-best $TRAINDIR/posparser
# Train NER
# custom script for training, but it takes too long... input is JSONL file (db from Prodigy)
# python custom_train.py train ./build/train/posparser ./train.jsonl ./eval.jsonl -o ./build/train/nerposparser -n 15
spacy train sk $TRAINDIR/nerposparser ./ner/experiments/34sknerfull.json ./ner/experiments/34wikiartfull.json --n-iter 15 -p ner
# Package model
spacy package $TRAINDIR/nerposparser dist --meta-path ./meta.json --force
cd dist/sk_sk1-0.2.0
python ./setup.py sdist --dist-dir ../