From 9f430dc54d88689afae634eb26f550fd5b2fb9f4 Mon Sep 17 00:00:00 2001
From: danny-mhlv <danny.mhlv@gmail.com>
Date: Wed, 14 Sep 2022 12:26:03 +0300
Subject: [PATCH 1/2] Added config script for Elastcsearch

---
 elastic/scripts/es_boot.sh | 208 +++++++++++++++++++++++++++++++++++++
 1 file changed, 208 insertions(+)
 create mode 100755 elastic/scripts/es_boot.sh

diff --git a/elastic/scripts/es_boot.sh b/elastic/scripts/es_boot.sh
new file mode 100755
index 0000000..5e5f2eb
--- /dev/null
+++ b/elastic/scripts/es_boot.sh
@@ -0,0 +1,208 @@
+#!/usr/bin/env bash
+
+__isint='^[0-9]+$'
+__isvalidstr='^[a-z0-9]+$'
+__isvalidaddr='^[a-z]+$|^((25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])$'
+
+create_template() {
+    curl -H "Content-Type: application/json" -X PUT "$1:$2/_index_template/papers_t" -d '
+        {
+            "index_patterns": ["papers*"],
+            "priority": 1,
+            "template": {
+                "aliases": {
+                    "papers": {}
+                },
+                "mappings": {
+                    "properties": {
+                        "id": {
+                            "type": "keyword"
+                        },
+                        "title": {
+                            "type": "text",
+                            "analyzer": "title_analyzer"
+                        },
+                        "authors": {
+                            "type": "text"
+                        },
+                        "topic": {
+                            "type": "text"
+                        },
+                        "summary": {
+                            "type": "text"
+                        },
+                        "tags": {
+                            "type": "keyword"
+                        },
+                        "content": {
+                            "type": "text"
+                        },
+                        "publisher": {
+                            "type": "text"
+                        }
+                    }
+                },
+                "settings": {
+                    "analysis": {
+                        "analyzer": {
+                            "title_analyzer": {
+                                "type": "custom",
+                                "tokenizer": "title_engram_tokenizer"
+                            },
+                            "content_analyzer_i": {
+                                "type": "custom",
+                                "tokenizer": "content_onchar_tokenizer",
+                                "char_filter": [
+                                    "markdown_token_filter"
+                                ]
+                            },
+                            "content_analyzer_s": {
+                                "type": "custom",
+                                "tokenizer": "content_onchar_tokenizer",
+                                "char_filter": [
+                                    "markdown_token_filter"
+                                ],
+                                "filter": [
+                                ]
+                            },
+                            "basic_analyzer": {
+
+                            }
+                        },
+                        "tokenizer": {
+                            "title_engram_tokenizer": {
+                                "type": "edge_ngram",
+                                "min_gram": 2,
+                                "max_gram": 10,
+                                "token_chars": [
+                                    "letter",
+                                    "digit"
+                                ]
+                            },
+                            "content_onchar_tokenizer": {
+                                "type": "char_group",
+                                "tokenize_on_chars": [
+                                    "whitespace",
+                                    ".", ",", "(", ")", "-", "[", "]", "{", 
+                                    "}", "#", ":", ";", "`", "!", "*"
+                                ]
+                            }
+                        },
+                        "char_filter": {
+                            "markdown_token_filter": {
+                                "type": "pattern_replace",
+                                "pattern": "[[a-z][0-9]]*://[[a-z][0-9]]*.[a-z]*",
+                                "replacement": ""
+                            }
+                        },
+                        "filter": {
+                        }
+                    }
+                }
+            }
+        }
+    '
+}
+
+#=============================================================================================================================================================================
+
+create_index() {
+    curl -X PUT "$1:$2/papers-$3?pretty"
+}
+
+#=============================================================================================================================================================================
+
+__usage="
+    Usage: $(basename $0)
+    ---------------------------------------------------------------------------
+    | -c, --create-only | Skip template initialization and only create        |
+    |                   | specified index. Result index name will be          |
+    |                   | 'papers-{specified name}'                           |
+    |                                                                         |
+    |               -h  | Help information                                    |
+    |                                                                         |
+    |               -a  | Specifies the address                               |
+    |                                                                         |
+    |       -p, --port  | Specifies the port                                  |
+    |                                                                         |
+    | -i, --index-name  | Specifies the index name:                           |
+    |                   | Must be lowercase, cannot include [\/*?\"<>| ,#:],   |
+    |                   | cannot start with [.-_+], cannot be \".\" or \"..\"     |
+    |                   | cannot be longer than 255 bytes (note: multi-byte   | 
+    |                   | characters will count towards the limit faster)     |
+    |                   | Result index name will be 'papers-{specified name}' |
+    ---------------------------------------------------------------------------
+"
+#=============================================================================================================================================================================
+
+CTRL=0
+
+if [[ "$1" == "-h" ]]; then
+    echo "$__usage"
+else
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            -p | --port)
+                if [[ -n "$2" && $2 =~ $__isint && "$2" -ge 1 && "$2" -le 65535 ]]; then
+                    PORT="$2"
+                    shift
+                else 
+                    echo "Invalid port number!"
+                fi
+                ;;
+            -a | --address)
+                if [[ -n "$2" ]]; then
+                    IP="$2"
+                    shift
+                else
+                    echo "Address is not specified!"
+                fi
+                ;;
+            -i | --index-name)
+                if [[ -n "$2" && $2 =~ $__isvalidstr ]]; then
+                    IND="$2"
+                    shift
+                else 
+                    echo "Index name is not specified!"
+                fi
+                ;;
+            -c | --create-only)
+                CTRL=2
+                ;;
+            -*)
+                echo "Option '$1' is not supported"
+                exit
+                ;;
+            *)
+                if [[ $1 =~ $__isvalidaddr ]]; then
+                    IP="$1"
+                elif [[ $1 =~ $__isint && "$1" -ge 1 && "$1" -le 65535 ]]; then
+                    PORT="$1"
+                elif [[ $1 =~ $__isvalidstr ]]; then
+                    IND="$1"
+                else 
+                    echo "Invalid argument!"
+                    exit;
+                fi
+                ;;
+        esac
+        shift
+    done
+
+    echo "Specified: $IP:$PORT | Index name: $IND"
+
+    case $CTRL in
+        0) # Default behaviour - full initialization (template creation and index creation)
+            create_template "$IP" "$PORT"
+            echo "Elasticsearch index template created"
+            create_index "$IP" "$PORT" "$IND"
+            echo "Elasticsearch index (papers-$IND) created"
+            exit
+            ;;
+        2) # Create index, skip creating the template
+            create_index "$IP" "$PORT" "$IND"
+            echo "Elasticsearch index (papers-$IND) created"
+            exit
+            ;;
+    esac
+fi
\ No newline at end of file

From 090ecb4ff7aeb3f240d81086855c45bfc11f2350 Mon Sep 17 00:00:00 2001
From: danny-mhlv <danny.mhlv@gmail.com>
Date: Tue, 20 Sep 2022 14:38:26 +0300
Subject: [PATCH 2/2] Elasticsearch config-s[Ccript functionality implemented

---
 elastic/samples/template.json |  95 +++++++++++++++++
 elastic/scripts/es_boot.sh    | 191 ++++++++++++++++------------------
 2 files changed, 182 insertions(+), 104 deletions(-)
 create mode 100644 elastic/samples/template.json

diff --git a/elastic/samples/template.json b/elastic/samples/template.json
new file mode 100644
index 0000000..b9fcdcc
--- /dev/null
+++ b/elastic/samples/template.json
@@ -0,0 +1,95 @@
+{
+    "index_patterns": ["papers*"],
+    "priority": 1,
+    "template": {
+        "aliases": {
+            "papers": {}
+        },
+        "mappings": {
+            "properties": {
+                "id": {
+                    "type": "keyword"
+                },
+                "title": {
+                    "type": "text",
+                    "analyzer": "title_analyzer"
+                },
+                "authors": {
+                    "type": "text"
+                },
+                "topic": {
+                    "type": "text"
+                },
+                "summary": {
+                    "type": "text"
+                },
+                "tags": {
+                    "type": "keyword"
+                },
+                "content": {
+                    "type": "text"
+                },
+                "publisher": {
+                    "type": "text"
+                }
+            }
+        },
+        "settings": {
+            "analysis": {
+                "analyzer": {
+                    "title_analyzer": {
+                        "type": "custom",
+                        "tokenizer": "title_engram_tokenizer"
+                    },
+                    "content_analyzer_i": {
+                        "type": "custom",
+                        "tokenizer": "content_onchar_tokenizer",
+                        "char_filter": [
+                            "markdown_token_filter"
+                        ]
+                    },
+                    "content_analyzer_s": {
+                        "type": "custom",
+                        "tokenizer": "content_onchar_tokenizer",
+                        "char_filter": [
+                            "markdown_token_filter"
+                        ],
+                        "filter": [
+                        ]
+                    },
+                    "basic_analyzer": {
+
+                    }
+                },
+                "tokenizer": {
+                    "title_engram_tokenizer": {
+                        "type": "edge_ngram",
+                        "min_gram": 2,
+                        "max_gram": 10,
+                        "token_chars": [
+                            "letter",
+                            "digit"
+                        ]
+                    },
+                    "content_onchar_tokenizer": {
+                        "type": "char_group",
+                        "tokenize_on_chars": [
+                            "whitespace",
+                            ".", ",", "(", ")", "-", "[", "]", "{", 
+                            "}", "#", ":", ";", "`", "!", "*"
+                        ]
+                    }
+                },
+                "char_filter": {
+                    "markdown_token_filter": {
+                        "type": "pattern_replace",
+                        "pattern": "[[a-z][0-9]]*://[[a-z][0-9]]*.[a-z]*",
+                        "replacement": ""
+                    }
+                },
+                "filter": {
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/elastic/scripts/es_boot.sh b/elastic/scripts/es_boot.sh
index 5e5f2eb..77c9650 100755
--- a/elastic/scripts/es_boot.sh
+++ b/elastic/scripts/es_boot.sh
@@ -3,105 +3,10 @@
 __isint='^[0-9]+$'
 __isvalidstr='^[a-z0-9]+$'
 __isvalidaddr='^[a-z]+$|^((25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])$'
+__isJSONfile='^[a-z0-9\_\-]+(\.json)$'
 
 create_template() {
-    curl -H "Content-Type: application/json" -X PUT "$1:$2/_index_template/papers_t" -d '
-        {
-            "index_patterns": ["papers*"],
-            "priority": 1,
-            "template": {
-                "aliases": {
-                    "papers": {}
-                },
-                "mappings": {
-                    "properties": {
-                        "id": {
-                            "type": "keyword"
-                        },
-                        "title": {
-                            "type": "text",
-                            "analyzer": "title_analyzer"
-                        },
-                        "authors": {
-                            "type": "text"
-                        },
-                        "topic": {
-                            "type": "text"
-                        },
-                        "summary": {
-                            "type": "text"
-                        },
-                        "tags": {
-                            "type": "keyword"
-                        },
-                        "content": {
-                            "type": "text"
-                        },
-                        "publisher": {
-                            "type": "text"
-                        }
-                    }
-                },
-                "settings": {
-                    "analysis": {
-                        "analyzer": {
-                            "title_analyzer": {
-                                "type": "custom",
-                                "tokenizer": "title_engram_tokenizer"
-                            },
-                            "content_analyzer_i": {
-                                "type": "custom",
-                                "tokenizer": "content_onchar_tokenizer",
-                                "char_filter": [
-                                    "markdown_token_filter"
-                                ]
-                            },
-                            "content_analyzer_s": {
-                                "type": "custom",
-                                "tokenizer": "content_onchar_tokenizer",
-                                "char_filter": [
-                                    "markdown_token_filter"
-                                ],
-                                "filter": [
-                                ]
-                            },
-                            "basic_analyzer": {
-
-                            }
-                        },
-                        "tokenizer": {
-                            "title_engram_tokenizer": {
-                                "type": "edge_ngram",
-                                "min_gram": 2,
-                                "max_gram": 10,
-                                "token_chars": [
-                                    "letter",
-                                    "digit"
-                                ]
-                            },
-                            "content_onchar_tokenizer": {
-                                "type": "char_group",
-                                "tokenize_on_chars": [
-                                    "whitespace",
-                                    ".", ",", "(", ")", "-", "[", "]", "{", 
-                                    "}", "#", ":", ";", "`", "!", "*"
-                                ]
-                            }
-                        },
-                        "char_filter": {
-                            "markdown_token_filter": {
-                                "type": "pattern_replace",
-                                "pattern": "[[a-z][0-9]]*://[[a-z][0-9]]*.[a-z]*",
-                                "replacement": ""
-                            }
-                        },
-                        "filter": {
-                        }
-                    }
-                }
-            }
-        }
-    '
+    curl -H "Content-Type: application/json" -X PUT "$1:$2/_index_template/papers_t" -d @"$3"
 }
 
 #=============================================================================================================================================================================
@@ -112,13 +17,45 @@ create_index() {
 
 #=============================================================================================================================================================================
 
+upload_files() {
+    _file="$4"
+    _dir="$5"
+
+    if [[ -n "$_file" ]]; then
+        curl -X POST "$1:$2/papers-$3/_doc/" \
+                -H "Content-Type: application/json" \
+                -d @"$_file"
+    elif [[ -n "$_dir" ]]; then
+        for file in "$_dir"/*; do
+            if [[ ! file =~ $__isJSONfile ]]; then 
+                echo "$file is not identified as JSON. Skipping..."
+                continue;
+            fi
+
+            curl -X POST "$1:$2/papers-$3/_doc/" \
+                -H "Content-Type: application/json" \
+                -d @"$file"
+        done
+    fi
+}
+
+#=============================================================================================================================================================================
+
 __usage="
-    Usage: $(basename $0)
-    ---------------------------------------------------------------------------
+    Usage: $(basename "$0")
+    #-------------------------------------------------------------------------#
+    |                                 *MODES*                                 |
+    |                                                                         |
+    | Note: 2 modes cannot be specified in one call.                          |
+    |                                                                         |
     | -c, --create-only | Skip template initialization and only create        |
     |                   | specified index. Result index name will be          |
     |                   | 'papers-{specified name}'                           |
     |                                                                         |
+    |     -u, --updload | Uploads the specified file(s) to specified index    |
+    |-------------------------------------------------------------------------|
+    |                                *OPTIONS*                                |
+    |                                                                         |
     |               -h  | Help information                                    |
     |                                                                         |
     |               -a  | Specifies the address                               |
@@ -131,7 +68,14 @@ __usage="
     |                   | cannot be longer than 255 bytes (note: multi-byte   | 
     |                   | characters will count towards the limit faster)     |
     |                   | Result index name will be 'papers-{specified name}' |
-    ---------------------------------------------------------------------------
+    |                                                                         |
+    |        -f, --file | Specify a JSON file that either a config or a       |
+    |                   | document                                            |
+    |                                                                         |
+    |         -d, --dir | Specify a directory containing documents to be      |
+    |                   | uploaded to the index                               |
+    |                                                                         |
+    #-------------------------------------------------------------------------#
 "
 #=============================================================================================================================================================================
 
@@ -139,6 +83,8 @@ CTRL=0
 
 if [[ "$1" == "-h" ]]; then
     echo "$__usage"
+elif [[ -z "$1" ]]; then
+    echo "Use -h or --help for usage information."
 else
     while [[ $# -gt 0 ]]; do
         case "$1" in
@@ -166,9 +112,32 @@ else
                     echo "Index name is not specified!"
                 fi
                 ;;
+            -f | --file)
+                if [[ -n "$2" && $2 =~ $__isJSONfile ]]; then
+                    FPATH="$2"
+                    shift
+                else
+                    echo "Invalid file name!"
+                fi
+                ;;
+            -d | --dir)
+                if [[ -n "$2" && -d "$2" ]]; then
+                    DIRPATH="$2"
+                    shift
+                fi
+                ;;
             -c | --create-only)
+                if [[ CTRL -ne 0 ]]; then echo "Incorrect use of modes. Use -h or --help."; 
+                    exit; 
+                fi
                 CTRL=2
                 ;;
+            -u | --upload)
+                if [[ CTRL -ne 0 ]]; then echo "Incorrect use of modes. Use -h or --help."; 
+                    exit; 
+                fi
+                CTRL=3
+                ;;
             -*)
                 echo "Option '$1' is not supported"
                 exit
@@ -180,6 +149,8 @@ else
                     PORT="$1"
                 elif [[ $1 =~ $__isvalidstr ]]; then
                     IND="$1"
+                elif [[ $1 =~ $__isJSONfile ]]; then
+                    FPATH="$1"
                 else 
                     echo "Invalid argument!"
                     exit;
@@ -190,18 +161,30 @@ else
     done
 
     echo "Specified: $IP:$PORT | Index name: $IND"
+    if [[ -z "$IP" || -z "$PORT" || -z "$IND" ]]; then
+        echo "Failed to identify target!"
+        exit
+    fi
 
     case $CTRL in
         0) # Default behaviour - full initialization (template creation and index creation)
-            create_template "$IP" "$PORT"
-            echo "Elasticsearch index template created"
+            if [[ -z "$FPATH" ]]; then
+                echo "Template config-file was not specified."
+            fi
+            create_template "$IP" "$PORT" "$FPATH"
+            echo -e "\nElasticsearch index template created"
             create_index "$IP" "$PORT" "$IND"
-            echo "Elasticsearch index (papers-$IND) created"
+            echo -e "\nElasticsearch index (papers-$IND) created"
             exit
             ;;
         2) # Create index, skip creating the template
             create_index "$IP" "$PORT" "$IND"
-            echo "Elasticsearch index (papers-$IND) created"
+            echo -e "\nElasticsearch index (papers-$IND) created"
+            exit
+            ;;
+        3) # Uploads the specified file(s) to specified index
+            upload_files "$IP" "$PORT" "$IND" "$FPATH" "$DIRPATH"
+            echo -e "\nFinished uploading to index (papers-$IND)!"
             exit
             ;;
     esac