diff --git a/elastic/samples/template.json b/elastic/samples/template.json new file mode 100644 index 0000000..b9fcdcc --- /dev/null +++ b/elastic/samples/template.json @@ -0,0 +1,95 @@ +{ + "index_patterns": ["papers*"], + "priority": 1, + "template": { + "aliases": { + "papers": {} + }, + "mappings": { + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "analyzer": "title_analyzer" + }, + "authors": { + "type": "text" + }, + "topic": { + "type": "text" + }, + "summary": { + "type": "text" + }, + "tags": { + "type": "keyword" + }, + "content": { + "type": "text" + }, + "publisher": { + "type": "text" + } + } + }, + "settings": { + "analysis": { + "analyzer": { + "title_analyzer": { + "type": "custom", + "tokenizer": "title_engram_tokenizer" + }, + "content_analyzer_i": { + "type": "custom", + "tokenizer": "content_onchar_tokenizer", + "char_filter": [ + "markdown_token_filter" + ] + }, + "content_analyzer_s": { + "type": "custom", + "tokenizer": "content_onchar_tokenizer", + "char_filter": [ + "markdown_token_filter" + ], + "filter": [ + ] + }, + "basic_analyzer": { + + } + }, + "tokenizer": { + "title_engram_tokenizer": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 10, + "token_chars": [ + "letter", + "digit" + ] + }, + "content_onchar_tokenizer": { + "type": "char_group", + "tokenize_on_chars": [ + "whitespace", + ".", ",", "(", ")", "-", "[", "]", "{", + "}", "#", ":", ";", "`", "!", "*" + ] + } + }, + "char_filter": { + "markdown_token_filter": { + "type": "pattern_replace", + "pattern": "[[a-z][0-9]]*://[[a-z][0-9]]*.[a-z]*", + "replacement": "" + } + }, + "filter": { + } + } + } + } +} \ No newline at end of file diff --git a/elastic/scripts/es_boot.sh b/elastic/scripts/es_boot.sh new file mode 100755 index 0000000..77c9650 --- /dev/null +++ b/elastic/scripts/es_boot.sh @@ -0,0 +1,191 @@ +#!/usr/bin/env bash + +__isint='^[0-9]+$' +__isvalidstr='^[a-z0-9]+$' +__isvalidaddr='^[a-z]+$|^((25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])$' +__isJSONfile='^[a-z0-9\_\-]+(\.json)$' + +create_template() { + curl -H "Content-Type: application/json" -X PUT "$1:$2/_index_template/papers_t" -d @"$3" +} + +#============================================================================================================================================================================= + +create_index() { + curl -X PUT "$1:$2/papers-$3?pretty" +} + +#============================================================================================================================================================================= + +upload_files() { + _file="$4" + _dir="$5" + + if [[ -n "$_file" ]]; then + curl -X POST "$1:$2/papers-$3/_doc/" \ + -H "Content-Type: application/json" \ + -d @"$_file" + elif [[ -n "$_dir" ]]; then + for file in "$_dir"/*; do + if [[ ! file =~ $__isJSONfile ]]; then + echo "$file is not identified as JSON. Skipping..." + continue; + fi + + curl -X POST "$1:$2/papers-$3/_doc/" \ + -H "Content-Type: application/json" \ + -d @"$file" + done + fi +} + +#============================================================================================================================================================================= + +__usage=" + Usage: $(basename "$0") + #-------------------------------------------------------------------------# + | *MODES* | + | | + | Note: 2 modes cannot be specified in one call. | + | | + | -c, --create-only | Skip template initialization and only create | + | | specified index. Result index name will be | + | | 'papers-{specified name}' | + | | + | -u, --updload | Uploads the specified file(s) to specified index | + |-------------------------------------------------------------------------| + | *OPTIONS* | + | | + | -h | Help information | + | | + | -a | Specifies the address | + | | + | -p, --port | Specifies the port | + | | + | -i, --index-name | Specifies the index name: | + | | Must be lowercase, cannot include [\/*?\"<>| ,#:], | + | | cannot start with [.-_+], cannot be \".\" or \"..\" | + | | cannot be longer than 255 bytes (note: multi-byte | + | | characters will count towards the limit faster) | + | | Result index name will be 'papers-{specified name}' | + | | + | -f, --file | Specify a JSON file that either a config or a | + | | document | + | | + | -d, --dir | Specify a directory containing documents to be | + | | uploaded to the index | + | | + #-------------------------------------------------------------------------# +" +#============================================================================================================================================================================= + +CTRL=0 + +if [[ "$1" == "-h" ]]; then + echo "$__usage" +elif [[ -z "$1" ]]; then + echo "Use -h or --help for usage information." +else + while [[ $# -gt 0 ]]; do + case "$1" in + -p | --port) + if [[ -n "$2" && $2 =~ $__isint && "$2" -ge 1 && "$2" -le 65535 ]]; then + PORT="$2" + shift + else + echo "Invalid port number!" + fi + ;; + -a | --address) + if [[ -n "$2" ]]; then + IP="$2" + shift + else + echo "Address is not specified!" + fi + ;; + -i | --index-name) + if [[ -n "$2" && $2 =~ $__isvalidstr ]]; then + IND="$2" + shift + else + echo "Index name is not specified!" + fi + ;; + -f | --file) + if [[ -n "$2" && $2 =~ $__isJSONfile ]]; then + FPATH="$2" + shift + else + echo "Invalid file name!" + fi + ;; + -d | --dir) + if [[ -n "$2" && -d "$2" ]]; then + DIRPATH="$2" + shift + fi + ;; + -c | --create-only) + if [[ CTRL -ne 0 ]]; then echo "Incorrect use of modes. Use -h or --help."; + exit; + fi + CTRL=2 + ;; + -u | --upload) + if [[ CTRL -ne 0 ]]; then echo "Incorrect use of modes. Use -h or --help."; + exit; + fi + CTRL=3 + ;; + -*) + echo "Option '$1' is not supported" + exit + ;; + *) + if [[ $1 =~ $__isvalidaddr ]]; then + IP="$1" + elif [[ $1 =~ $__isint && "$1" -ge 1 && "$1" -le 65535 ]]; then + PORT="$1" + elif [[ $1 =~ $__isvalidstr ]]; then + IND="$1" + elif [[ $1 =~ $__isJSONfile ]]; then + FPATH="$1" + else + echo "Invalid argument!" + exit; + fi + ;; + esac + shift + done + + echo "Specified: $IP:$PORT | Index name: $IND" + if [[ -z "$IP" || -z "$PORT" || -z "$IND" ]]; then + echo "Failed to identify target!" + exit + fi + + case $CTRL in + 0) # Default behaviour - full initialization (template creation and index creation) + if [[ -z "$FPATH" ]]; then + echo "Template config-file was not specified." + fi + create_template "$IP" "$PORT" "$FPATH" + echo -e "\nElasticsearch index template created" + create_index "$IP" "$PORT" "$IND" + echo -e "\nElasticsearch index (papers-$IND) created" + exit + ;; + 2) # Create index, skip creating the template + create_index "$IP" "$PORT" "$IND" + echo -e "\nElasticsearch index (papers-$IND) created" + exit + ;; + 3) # Uploads the specified file(s) to specified index + upload_files "$IP" "$PORT" "$IND" "$FPATH" "$DIRPATH" + echo -e "\nFinished uploading to index (papers-$IND)!" + exit + ;; + esac +fi \ No newline at end of file