#!/bin/sh
# dependencies:
# - wikiextractor (Python Module)
# (pip install wikiextractor)
#
# usage:
# wikiex <wiki dump xml file> <txt file to store>

wikiextractor "$1" --processes 8 -q -o - \
| sed "/^\s*\$/d" \
| grep -v "^<doc id=" \
| grep -v "</doc>\$" \
> "$2"
