Elasticsearch config-s[Ccript functionality implemented

This commit is contained in:
danny-mhlv 2022-09-20 14:38:26 +03:00
parent 9f430dc54d
commit 090ecb4ff7
2 changed files with 182 additions and 104 deletions

View File

@ -0,0 +1,95 @@
{
"index_patterns": ["papers*"],
"priority": 1,
"template": {
"aliases": {
"papers": {}
},
"mappings": {
"properties": {
"id": {
"type": "keyword"
},
"title": {
"type": "text",
"analyzer": "title_analyzer"
},
"authors": {
"type": "text"
},
"topic": {
"type": "text"
},
"summary": {
"type": "text"
},
"tags": {
"type": "keyword"
},
"content": {
"type": "text"
},
"publisher": {
"type": "text"
}
}
},
"settings": {
"analysis": {
"analyzer": {
"title_analyzer": {
"type": "custom",
"tokenizer": "title_engram_tokenizer"
},
"content_analyzer_i": {
"type": "custom",
"tokenizer": "content_onchar_tokenizer",
"char_filter": [
"markdown_token_filter"
]
},
"content_analyzer_s": {
"type": "custom",
"tokenizer": "content_onchar_tokenizer",
"char_filter": [
"markdown_token_filter"
],
"filter": [
]
},
"basic_analyzer": {
}
},
"tokenizer": {
"title_engram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 10,
"token_chars": [
"letter",
"digit"
]
},
"content_onchar_tokenizer": {
"type": "char_group",
"tokenize_on_chars": [
"whitespace",
".", ",", "(", ")", "-", "[", "]", "{",
"}", "#", ":", ";", "`", "!", "*"
]
}
},
"char_filter": {
"markdown_token_filter": {
"type": "pattern_replace",
"pattern": "[[a-z][0-9]]*://[[a-z][0-9]]*.[a-z]*",
"replacement": ""
}
},
"filter": {
}
}
}
}
}

View File

@ -3,105 +3,10 @@
__isint='^[0-9]+$' __isint='^[0-9]+$'
__isvalidstr='^[a-z0-9]+$' __isvalidstr='^[a-z0-9]+$'
__isvalidaddr='^[a-z]+$|^((25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])$' __isvalidaddr='^[a-z]+$|^((25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])\.){3}(25[0-5]|2[0-4]?[0-9]|1[0-9]?[0-9]|[3-9][0-9]|[0-9])$'
__isJSONfile='^[a-z0-9\_\-]+(\.json)$'
create_template() { create_template() {
curl -H "Content-Type: application/json" -X PUT "$1:$2/_index_template/papers_t" -d ' curl -H "Content-Type: application/json" -X PUT "$1:$2/_index_template/papers_t" -d @"$3"
{
"index_patterns": ["papers*"],
"priority": 1,
"template": {
"aliases": {
"papers": {}
},
"mappings": {
"properties": {
"id": {
"type": "keyword"
},
"title": {
"type": "text",
"analyzer": "title_analyzer"
},
"authors": {
"type": "text"
},
"topic": {
"type": "text"
},
"summary": {
"type": "text"
},
"tags": {
"type": "keyword"
},
"content": {
"type": "text"
},
"publisher": {
"type": "text"
}
}
},
"settings": {
"analysis": {
"analyzer": {
"title_analyzer": {
"type": "custom",
"tokenizer": "title_engram_tokenizer"
},
"content_analyzer_i": {
"type": "custom",
"tokenizer": "content_onchar_tokenizer",
"char_filter": [
"markdown_token_filter"
]
},
"content_analyzer_s": {
"type": "custom",
"tokenizer": "content_onchar_tokenizer",
"char_filter": [
"markdown_token_filter"
],
"filter": [
]
},
"basic_analyzer": {
}
},
"tokenizer": {
"title_engram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 10,
"token_chars": [
"letter",
"digit"
]
},
"content_onchar_tokenizer": {
"type": "char_group",
"tokenize_on_chars": [
"whitespace",
".", ",", "(", ")", "-", "[", "]", "{",
"}", "#", ":", ";", "`", "!", "*"
]
}
},
"char_filter": {
"markdown_token_filter": {
"type": "pattern_replace",
"pattern": "[[a-z][0-9]]*://[[a-z][0-9]]*.[a-z]*",
"replacement": ""
}
},
"filter": {
}
}
}
}
}
'
} }
#============================================================================================================================================================================= #=============================================================================================================================================================================
@ -112,13 +17,45 @@ create_index() {
#============================================================================================================================================================================= #=============================================================================================================================================================================
upload_files() {
_file="$4"
_dir="$5"
if [[ -n "$_file" ]]; then
curl -X POST "$1:$2/papers-$3/_doc/" \
-H "Content-Type: application/json" \
-d @"$_file"
elif [[ -n "$_dir" ]]; then
for file in "$_dir"/*; do
if [[ ! file =~ $__isJSONfile ]]; then
echo "$file is not identified as JSON. Skipping..."
continue;
fi
curl -X POST "$1:$2/papers-$3/_doc/" \
-H "Content-Type: application/json" \
-d @"$file"
done
fi
}
#=============================================================================================================================================================================
__usage=" __usage="
Usage: $(basename $0) Usage: $(basename "$0")
--------------------------------------------------------------------------- #-------------------------------------------------------------------------#
| *MODES* |
| |
| Note: 2 modes cannot be specified in one call. |
| |
| -c, --create-only | Skip template initialization and only create | | -c, --create-only | Skip template initialization and only create |
| | specified index. Result index name will be | | | specified index. Result index name will be |
| | 'papers-{specified name}' | | | 'papers-{specified name}' |
| | | |
| -u, --updload | Uploads the specified file(s) to specified index |
|-------------------------------------------------------------------------|
| *OPTIONS* |
| |
| -h | Help information | | -h | Help information |
| | | |
| -a | Specifies the address | | -a | Specifies the address |
@ -131,7 +68,14 @@ __usage="
| | cannot be longer than 255 bytes (note: multi-byte | | | cannot be longer than 255 bytes (note: multi-byte |
| | characters will count towards the limit faster) | | | characters will count towards the limit faster) |
| | Result index name will be 'papers-{specified name}' | | | Result index name will be 'papers-{specified name}' |
--------------------------------------------------------------------------- | |
| -f, --file | Specify a JSON file that either a config or a |
| | document |
| |
| -d, --dir | Specify a directory containing documents to be |
| | uploaded to the index |
| |
#-------------------------------------------------------------------------#
" "
#============================================================================================================================================================================= #=============================================================================================================================================================================
@ -139,6 +83,8 @@ CTRL=0
if [[ "$1" == "-h" ]]; then if [[ "$1" == "-h" ]]; then
echo "$__usage" echo "$__usage"
elif [[ -z "$1" ]]; then
echo "Use -h or --help for usage information."
else else
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case "$1" in case "$1" in
@ -166,9 +112,32 @@ else
echo "Index name is not specified!" echo "Index name is not specified!"
fi fi
;; ;;
-f | --file)
if [[ -n "$2" && $2 =~ $__isJSONfile ]]; then
FPATH="$2"
shift
else
echo "Invalid file name!"
fi
;;
-d | --dir)
if [[ -n "$2" && -d "$2" ]]; then
DIRPATH="$2"
shift
fi
;;
-c | --create-only) -c | --create-only)
if [[ CTRL -ne 0 ]]; then echo "Incorrect use of modes. Use -h or --help.";
exit;
fi
CTRL=2 CTRL=2
;; ;;
-u | --upload)
if [[ CTRL -ne 0 ]]; then echo "Incorrect use of modes. Use -h or --help.";
exit;
fi
CTRL=3
;;
-*) -*)
echo "Option '$1' is not supported" echo "Option '$1' is not supported"
exit exit
@ -180,6 +149,8 @@ else
PORT="$1" PORT="$1"
elif [[ $1 =~ $__isvalidstr ]]; then elif [[ $1 =~ $__isvalidstr ]]; then
IND="$1" IND="$1"
elif [[ $1 =~ $__isJSONfile ]]; then
FPATH="$1"
else else
echo "Invalid argument!" echo "Invalid argument!"
exit; exit;
@ -190,18 +161,30 @@ else
done done
echo "Specified: $IP:$PORT | Index name: $IND" echo "Specified: $IP:$PORT | Index name: $IND"
if [[ -z "$IP" || -z "$PORT" || -z "$IND" ]]; then
echo "Failed to identify target!"
exit
fi
case $CTRL in case $CTRL in
0) # Default behaviour - full initialization (template creation and index creation) 0) # Default behaviour - full initialization (template creation and index creation)
create_template "$IP" "$PORT" if [[ -z "$FPATH" ]]; then
echo "Elasticsearch index template created" echo "Template config-file was not specified."
fi
create_template "$IP" "$PORT" "$FPATH"
echo -e "\nElasticsearch index template created"
create_index "$IP" "$PORT" "$IND" create_index "$IP" "$PORT" "$IND"
echo "Elasticsearch index (papers-$IND) created" echo -e "\nElasticsearch index (papers-$IND) created"
exit exit
;; ;;
2) # Create index, skip creating the template 2) # Create index, skip creating the template
create_index "$IP" "$PORT" "$IND" create_index "$IP" "$PORT" "$IND"
echo "Elasticsearch index (papers-$IND) created" echo -e "\nElasticsearch index (papers-$IND) created"
exit
;;
3) # Uploads the specified file(s) to specified index
upload_files "$IP" "$PORT" "$IND" "$FPATH" "$DIRPATH"
echo -e "\nFinished uploading to index (papers-$IND)!"
exit exit
;; ;;
esac esac