diff --git a/pages/students/2016/jakub_maruniak/dp2021/annotation/train/prepare.sh b/pages/students/2016/jakub_maruniak/dp2021/annotation/train/prepare.sh new file mode 100644 index 0000000000..ade40371f4 --- /dev/null +++ b/pages/students/2016/jakub_maruniak/dp2021/annotation/train/prepare.sh @@ -0,0 +1,19 @@ +mkdir -p build +mkdir -p build/input +# Prepare Treebank +mkdir -p build/input/slovak-treebank +spacy convert ./sources/slovak-treebank/stb.conll ./build/input/slovak-treebank +# UDAG used as evaluation +mkdir -p build/input/ud-artificial-gapping +spacy convert ./sources/ud-artificial-gapping/sk-ud-crawled-orphan.conllu ./build/input/ud-artificial-gapping +# Prepare skner +mkdir -p build/input/skner +# Convert to IOB +cat ./sources/skner/wikiann-sk.bio | python ./sources/bio-to-iob.py > build/input/skner/wikiann-sk.iob +# Split to train test +cat ./build/input/skner/wikiann-sk.iob | python ./sources/iob-to-traintest.py ./build/input/skner/wikiann-sk +# Convert train and test +mkdir -p build/input/skner-train +spacy convert -n 15 --converter ner ./build/input/skner/wikiann-sk.train ./build/input/skner-train +mkdir -p build/input/skner-test +spacy convert -n 15 --converter ner ./build/input/skner/wikiann-sk.test ./build/input/skner-test diff --git a/pages/students/2016/jakub_maruniak/dp2021/annotation/train/train.sh b/pages/students/2016/jakub_maruniak/dp2021/annotation/train/train.sh new file mode 100644 index 0000000000..a0d1c7cfc1 --- /dev/null +++ b/pages/students/2016/jakub_maruniak/dp2021/annotation/train/train.sh @@ -0,0 +1,19 @@ +set -e +OUTDIR=build/train/output +TRAINDIR=build/train +mkdir -p $TRAINDIR +mkdir -p $OUTDIR +mkdir -p dist +# Delete old training results +rm -rf $OUTDIR/* +# Train dependency and POS +spacy train sk $OUTDIR ./build/input/slovak-treebank ./build/input/ud-artificial-gapping --n-iter 20 -p tagger,parser +rm -rf $TRAINDIR/posparser +mv $OUTDIR/model-best $TRAINDIR/posparser +# Train NER +# python ./train.py -t ./train.json -o $TRAINDIR/nerposparser -n 10 -m $TRAINDIR/posparser/ +spacy train sk $TRAINDIR/nerposparser ./ner/train.json ./ner/eval.json --n-iter 20 -p ner +# Package model +spacy package $TRAINDIR/nerposparser dist --meta-path ./meta.json --force +cd dist/sk_sk1-0.2.0 +python ./setup.py sdist --dist-dir ../