From 9f430dc54d88689afae634eb26f550fd5b2fb9f4 Mon Sep 17 00:00:00 2001 From: danny-mhlv Date: Wed, 14 Sep 2022 12:26:03 +0300 Subject: [PATCH 1/2] Added config script for Elastcsearch --- elastic/scripts/es_boot.sh | 208 +++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100755 elastic/scripts/es_boot.sh diff --git a/elastic/scripts/es_boot.sh b/elastic/scripts/es_boot.sh new file mode 100755 index 0000000..5e5f2eb --- /dev/null +++ b/elastic/scripts/es_boot.sh @@ -0,0 +1,208 @@ +#!/usr/bin/env bash + +__isint='^[0-9]+$' +__isvalidstr='^[a-z0-9]+$' +__isvalidaddr='^[a-z]+$|^((25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])$' + +create_template() { + curl -H "Content-Type: application/json" -X PUT "$1:$2/_index_template/papers_t" -d ' + { + "index_patterns": ["papers*"], + "priority": 1, + "template": { + "aliases": { + "papers": {} + }, + "mappings": { + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "analyzer": "title_analyzer" + }, + "authors": { + "type": "text" + }, + "topic": { + "type": "text" + }, + "summary": { + "type": "text" + }, + "tags": { + "type": "keyword" + }, + "content": { + "type": "text" + }, + "publisher": { + "type": "text" + } + } + }, + "settings": { + "analysis": { + "analyzer": { + "title_analyzer": { + "type": "custom", + "tokenizer": "title_engram_tokenizer" + }, + "content_analyzer_i": { + "type": "custom", + "tokenizer": "content_onchar_tokenizer", + "char_filter": [ + "markdown_token_filter" + ] + }, + "content_analyzer_s": { + "type": "custom", + "tokenizer": "content_onchar_tokenizer", + "char_filter": [ + "markdown_token_filter" + ], + "filter": [ + ] + }, + "basic_analyzer": { + + } + }, + "tokenizer": { + "title_engram_tokenizer": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 10, + "token_chars": [ + "letter", + "digit" + ] + }, + "content_onchar_tokenizer": { + "type": "char_group", + "tokenize_on_chars": [ + "whitespace", + ".", ",", "(", ")", "-", "[", "]", "{", + "}", "#", ":", ";", "`", "!", "*" + ] + } + }, + "char_filter": { + "markdown_token_filter": { + "type": "pattern_replace", + "pattern": "[[a-z][0-9]]*://[[a-z][0-9]]*.[a-z]*", + "replacement": "" + } + }, + "filter": { + } + } + } + } + } + ' +} + +#============================================================================================================================================================================= + +create_index() { + curl -X PUT "$1:$2/papers-$3?pretty" +} + +#============================================================================================================================================================================= + +__usage=" + Usage: $(basename $0) + --------------------------------------------------------------------------- + | -c, --create-only | Skip template initialization and only create | + | | specified index. Result index name will be | + | | 'papers-{specified name}' | + | | + | -h | Help information | + | | + | -a | Specifies the address | + | | + | -p, --port | Specifies the port | + | | + | -i, --index-name | Specifies the index name: | + | | Must be lowercase, cannot include [\/*?\"<>| ,#:], | + | | cannot start with [.-_+], cannot be \".\" or \"..\" | + | | cannot be longer than 255 bytes (note: multi-byte | + | | characters will count towards the limit faster) | + | | Result index name will be 'papers-{specified name}' | + --------------------------------------------------------------------------- +" +#============================================================================================================================================================================= + +CTRL=0 + +if [[ "$1" == "-h" ]]; then + echo "$__usage" +else + while [[ $# -gt 0 ]]; do + case "$1" in + -p | --port) + if [[ -n "$2" && $2 =~ $__isint && "$2" -ge 1 && "$2" -le 65535 ]]; then + PORT="$2" + shift + else + echo "Invalid port number!" + fi + ;; + -a | --address) + if [[ -n "$2" ]]; then + IP="$2" + shift + else + echo "Address is not specified!" + fi + ;; + -i | --index-name) + if [[ -n "$2" && $2 =~ $__isvalidstr ]]; then + IND="$2" + shift + else + echo "Index name is not specified!" + fi + ;; + -c | --create-only) + CTRL=2 + ;; + -*) + echo "Option '$1' is not supported" + exit + ;; + *) + if [[ $1 =~ $__isvalidaddr ]]; then + IP="$1" + elif [[ $1 =~ $__isint && "$1" -ge 1 && "$1" -le 65535 ]]; then + PORT="$1" + elif [[ $1 =~ $__isvalidstr ]]; then + IND="$1" + else + echo "Invalid argument!" + exit; + fi + ;; + esac + shift + done + + echo "Specified: $IP:$PORT | Index name: $IND" + + case $CTRL in + 0) # Default behaviour - full initialization (template creation and index creation) + create_template "$IP" "$PORT" + echo "Elasticsearch index template created" + create_index "$IP" "$PORT" "$IND" + echo "Elasticsearch index (papers-$IND) created" + exit + ;; + 2) # Create index, skip creating the template + create_index "$IP" "$PORT" "$IND" + echo "Elasticsearch index (papers-$IND) created" + exit + ;; + esac +fi \ No newline at end of file From 090ecb4ff7aeb3f240d81086855c45bfc11f2350 Mon Sep 17 00:00:00 2001 From: danny-mhlv Date: Tue, 20 Sep 2022 14:38:26 +0300 Subject: [PATCH 2/2] Elasticsearch config-s[Ccript functionality implemented --- elastic/samples/template.json | 95 +++++++++++++++++ elastic/scripts/es_boot.sh | 191 ++++++++++++++++------------------ 2 files changed, 182 insertions(+), 104 deletions(-) create mode 100644 elastic/samples/template.json diff --git a/elastic/samples/template.json b/elastic/samples/template.json new file mode 100644 index 0000000..b9fcdcc --- /dev/null +++ b/elastic/samples/template.json @@ -0,0 +1,95 @@ +{ + "index_patterns": ["papers*"], + "priority": 1, + "template": { + "aliases": { + "papers": {} + }, + "mappings": { + "properties": { + "id": { + "type": "keyword" + }, + "title": { + "type": "text", + "analyzer": "title_analyzer" + }, + "authors": { + "type": "text" + }, + "topic": { + "type": "text" + }, + "summary": { + "type": "text" + }, + "tags": { + "type": "keyword" + }, + "content": { + "type": "text" + }, + "publisher": { + "type": "text" + } + } + }, + "settings": { + "analysis": { + "analyzer": { + "title_analyzer": { + "type": "custom", + "tokenizer": "title_engram_tokenizer" + }, + "content_analyzer_i": { + "type": "custom", + "tokenizer": "content_onchar_tokenizer", + "char_filter": [ + "markdown_token_filter" + ] + }, + "content_analyzer_s": { + "type": "custom", + "tokenizer": "content_onchar_tokenizer", + "char_filter": [ + "markdown_token_filter" + ], + "filter": [ + ] + }, + "basic_analyzer": { + + } + }, + "tokenizer": { + "title_engram_tokenizer": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 10, + "token_chars": [ + "letter", + "digit" + ] + }, + "content_onchar_tokenizer": { + "type": "char_group", + "tokenize_on_chars": [ + "whitespace", + ".", ",", "(", ")", "-", "[", "]", "{", + "}", "#", ":", ";", "`", "!", "*" + ] + } + }, + "char_filter": { + "markdown_token_filter": { + "type": "pattern_replace", + "pattern": "[[a-z][0-9]]*://[[a-z][0-9]]*.[a-z]*", + "replacement": "" + } + }, + "filter": { + } + } + } + } +} \ No newline at end of file diff --git a/elastic/scripts/es_boot.sh b/elastic/scripts/es_boot.sh index 5e5f2eb..77c9650 100755 --- a/elastic/scripts/es_boot.sh +++ b/elastic/scripts/es_boot.sh @@ -3,105 +3,10 @@ __isint='^[0-9]+$' __isvalidstr='^[a-z0-9]+$' __isvalidaddr='^[a-z]+$|^((25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])$' +__isJSONfile='^[a-z0-9\_\-]+(\.json)$' create_template() { - curl -H "Content-Type: application/json" -X PUT "$1:$2/_index_template/papers_t" -d ' - { - "index_patterns": ["papers*"], - "priority": 1, - "template": { - "aliases": { - "papers": {} - }, - "mappings": { - "properties": { - "id": { - "type": "keyword" - }, - "title": { - "type": "text", - "analyzer": "title_analyzer" - }, - "authors": { - "type": "text" - }, - "topic": { - "type": "text" - }, - "summary": { - "type": "text" - }, - "tags": { - "type": "keyword" - }, - "content": { - "type": "text" - }, - "publisher": { - "type": "text" - } - } - }, - "settings": { - "analysis": { - "analyzer": { - "title_analyzer": { - "type": "custom", - "tokenizer": "title_engram_tokenizer" - }, - "content_analyzer_i": { - "type": "custom", - "tokenizer": "content_onchar_tokenizer", - "char_filter": [ - "markdown_token_filter" - ] - }, - "content_analyzer_s": { - "type": "custom", - "tokenizer": "content_onchar_tokenizer", - "char_filter": [ - "markdown_token_filter" - ], - "filter": [ - ] - }, - "basic_analyzer": { - - } - }, - "tokenizer": { - "title_engram_tokenizer": { - "type": "edge_ngram", - "min_gram": 2, - "max_gram": 10, - "token_chars": [ - "letter", - "digit" - ] - }, - "content_onchar_tokenizer": { - "type": "char_group", - "tokenize_on_chars": [ - "whitespace", - ".", ",", "(", ")", "-", "[", "]", "{", - "}", "#", ":", ";", "`", "!", "*" - ] - } - }, - "char_filter": { - "markdown_token_filter": { - "type": "pattern_replace", - "pattern": "[[a-z][0-9]]*://[[a-z][0-9]]*.[a-z]*", - "replacement": "" - } - }, - "filter": { - } - } - } - } - } - ' + curl -H "Content-Type: application/json" -X PUT "$1:$2/_index_template/papers_t" -d @"$3" } #============================================================================================================================================================================= @@ -112,13 +17,45 @@ create_index() { #============================================================================================================================================================================= +upload_files() { + _file="$4" + _dir="$5" + + if [[ -n "$_file" ]]; then + curl -X POST "$1:$2/papers-$3/_doc/" \ + -H "Content-Type: application/json" \ + -d @"$_file" + elif [[ -n "$_dir" ]]; then + for file in "$_dir"/*; do + if [[ ! file =~ $__isJSONfile ]]; then + echo "$file is not identified as JSON. Skipping..." + continue; + fi + + curl -X POST "$1:$2/papers-$3/_doc/" \ + -H "Content-Type: application/json" \ + -d @"$file" + done + fi +} + +#============================================================================================================================================================================= + __usage=" - Usage: $(basename $0) - --------------------------------------------------------------------------- + Usage: $(basename "$0") + #-------------------------------------------------------------------------# + | *MODES* | + | | + | Note: 2 modes cannot be specified in one call. | + | | | -c, --create-only | Skip template initialization and only create | | | specified index. Result index name will be | | | 'papers-{specified name}' | | | + | -u, --updload | Uploads the specified file(s) to specified index | + |-------------------------------------------------------------------------| + | *OPTIONS* | + | | | -h | Help information | | | | -a | Specifies the address | @@ -131,7 +68,14 @@ __usage=" | | cannot be longer than 255 bytes (note: multi-byte | | | characters will count towards the limit faster) | | | Result index name will be 'papers-{specified name}' | - --------------------------------------------------------------------------- + | | + | -f, --file | Specify a JSON file that either a config or a | + | | document | + | | + | -d, --dir | Specify a directory containing documents to be | + | | uploaded to the index | + | | + #-------------------------------------------------------------------------# " #============================================================================================================================================================================= @@ -139,6 +83,8 @@ CTRL=0 if [[ "$1" == "-h" ]]; then echo "$__usage" +elif [[ -z "$1" ]]; then + echo "Use -h or --help for usage information." else while [[ $# -gt 0 ]]; do case "$1" in @@ -166,9 +112,32 @@ else echo "Index name is not specified!" fi ;; + -f | --file) + if [[ -n "$2" && $2 =~ $__isJSONfile ]]; then + FPATH="$2" + shift + else + echo "Invalid file name!" + fi + ;; + -d | --dir) + if [[ -n "$2" && -d "$2" ]]; then + DIRPATH="$2" + shift + fi + ;; -c | --create-only) + if [[ CTRL -ne 0 ]]; then echo "Incorrect use of modes. Use -h or --help."; + exit; + fi CTRL=2 ;; + -u | --upload) + if [[ CTRL -ne 0 ]]; then echo "Incorrect use of modes. Use -h or --help."; + exit; + fi + CTRL=3 + ;; -*) echo "Option '$1' is not supported" exit @@ -180,6 +149,8 @@ else PORT="$1" elif [[ $1 =~ $__isvalidstr ]]; then IND="$1" + elif [[ $1 =~ $__isJSONfile ]]; then + FPATH="$1" else echo "Invalid argument!" exit; @@ -190,18 +161,30 @@ else done echo "Specified: $IP:$PORT | Index name: $IND" + if [[ -z "$IP" || -z "$PORT" || -z "$IND" ]]; then + echo "Failed to identify target!" + exit + fi case $CTRL in 0) # Default behaviour - full initialization (template creation and index creation) - create_template "$IP" "$PORT" - echo "Elasticsearch index template created" + if [[ -z "$FPATH" ]]; then + echo "Template config-file was not specified." + fi + create_template "$IP" "$PORT" "$FPATH" + echo -e "\nElasticsearch index template created" create_index "$IP" "$PORT" "$IND" - echo "Elasticsearch index (papers-$IND) created" + echo -e "\nElasticsearch index (papers-$IND) created" exit ;; 2) # Create index, skip creating the template create_index "$IP" "$PORT" "$IND" - echo "Elasticsearch index (papers-$IND) created" + echo -e "\nElasticsearch index (papers-$IND) created" + exit + ;; + 3) # Uploads the specified file(s) to specified index + upload_files "$IP" "$PORT" "$IND" "$FPATH" "$DIRPATH" + echo -e "\nFinished uploading to index (papers-$IND)!" exit ;; esac