Nahrát soubory do „pages/students/2016/jakub_maruniak/dp2021/annotation/train“

This commit is contained in:
Jakub Maruniak 2020-11-09 21:46:12 +00:00
parent 027c9e3f50
commit 0fcb3d7f2d
2 changed files with 38 additions and 0 deletions

View File

@ -0,0 +1,19 @@
mkdir -p build
mkdir -p build/input
# Prepare Treebank
mkdir -p build/input/slovak-treebank
spacy convert ./sources/slovak-treebank/stb.conll ./build/input/slovak-treebank
# UDAG used as evaluation
mkdir -p build/input/ud-artificial-gapping
spacy convert ./sources/ud-artificial-gapping/sk-ud-crawled-orphan.conllu ./build/input/ud-artificial-gapping
# Prepare skner
mkdir -p build/input/skner
# Convert to IOB
cat ./sources/skner/wikiann-sk.bio | python ./sources/bio-to-iob.py > build/input/skner/wikiann-sk.iob
# Split to train test
cat ./build/input/skner/wikiann-sk.iob | python ./sources/iob-to-traintest.py ./build/input/skner/wikiann-sk
# Convert train and test
mkdir -p build/input/skner-train
spacy convert -n 15 --converter ner ./build/input/skner/wikiann-sk.train ./build/input/skner-train
mkdir -p build/input/skner-test
spacy convert -n 15 --converter ner ./build/input/skner/wikiann-sk.test ./build/input/skner-test

View File

@ -0,0 +1,19 @@
set -e
OUTDIR=build/train/output
TRAINDIR=build/train
mkdir -p $TRAINDIR
mkdir -p $OUTDIR
mkdir -p dist
# Delete old training results
rm -rf $OUTDIR/*
# Train dependency and POS
spacy train sk $OUTDIR ./build/input/slovak-treebank ./build/input/ud-artificial-gapping --n-iter 20 -p tagger,parser
rm -rf $TRAINDIR/posparser
mv $OUTDIR/model-best $TRAINDIR/posparser
# Train NER
# python ./train.py -t ./train.json -o $TRAINDIR/nerposparser -n 10 -m $TRAINDIR/posparser/
spacy train sk $TRAINDIR/nerposparser ./ner/train.json ./ner/eval.json --n-iter 20 -p ner
# Package model
spacy package $TRAINDIR/nerposparser dist --meta-path ./meta.json --force
cd dist/sk_sk1-0.2.0
python ./setup.py sdist --dist-dir ../