Initial commit

This commit is contained in:
Sviatoslav Tsariov Yurievich 2023-09-07 18:14:18 +03:00
commit fcee973a7a
12 changed files with 1227 additions and 0 deletions

134
.gitignore vendored Normal file
View File

@ -0,0 +1,134 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
*.json
.vscode
.token
responses

0
README.md Normal file
View File

13
downloader/.env.example Normal file
View File

@ -0,0 +1,13 @@
APP_PORT=
MONGO_USER=
MONGO_PASS=
MONGO_HOST=
MONGO_PORT=
MONGO_DB=
MONGO_COLLECTION=
STARS_MIN=
STARS_MAX=
GITHUB_TOKEN=

117
downloader/app.py Normal file
View File

@ -0,0 +1,117 @@
from flask import Flask, request, jsonify
from pymongo import MongoClient, DESCENDING, TEXT
from datetime import datetime
from config import App, Mongo, Parser
from collect import split_by_days
app = Flask(__name__)
client = MongoClient(f'mongodb://{Mongo.Username}:{Mongo.Password}@{Mongo.Host}:{Mongo.Port}') # MongoDB connection string
db = client[Mongo.Database] # Database name
if Mongo.Collection not in db.list_collection_names():
db.create_collection(Mongo.Collection)
collection = db[Mongo.Collection] # Collection name
# Specify the field and options for the index
stars_index_key = [('stargazers.totalCount', DESCENDING)]
repo_name_index_key = [('nameWithOwner')]
if 'stars' not in collection.index_information():
collection.create_index(stars_index_key, name='stars')
if 'repoName' not in collection.index_information():
collection.create_index(repo_name_index_key, unique=True, name='repoName')
@app.route('/')
def hello():
return 'Hello, World!'
# Create a new repository
@app.route('/repositories', methods=['POST'])
def create_repository():
data = request.get_json()
result = collection.insert_one(data)
return jsonify({'message': 'Repository created', 'id': str(result.inserted_id)}), 201
# Read all repositories with pagination
@app.route('/repositories', methods=['GET'])
def get_all_repositories():
page = int(request.args.get('page', 1)) # Get the page number (default: 1)
page_size = int(request.args.get('page_size', 10)) # Get the page size (default: 10)
# Get the start and end values for the stargazers.totalCount range from query parameters
start_value = int(request.args.get('start_value', Parser.MinStars))
end_value = int(request.args.get('end_value', Parser.MaxStars))
search_filter = None
if start_value and end_value:
search_filter = {
'stargazers.totalCount': {
'$gte': start_value,
'$lte': end_value
}
}
# Calculate the skip value based on the page and page_size
skip = (page - 1) * page_size
# Retrieve repositories with pagination
repositories = list(collection.find(search_filter).skip(skip).limit(page_size))
# Convert ObjectId to string for JSON serialization for each repository
for repo in repositories:
repo['_id'] = str(repo['_id'])
return jsonify(repositories), 200
# Read a specific repository by ID
@app.route('/repositories/<repository_id>', methods=['GET'])
def get_repository(repository_id):
repository = collection.find_one({'_id': repository_id})
if repository:
return jsonify(repository), 200
else:
return jsonify({'message': 'Repository not found'}), 404
# Update a repository by ID
@app.route('/repositories/<repository_id>', methods=['PUT'])
def update_repository(repository_id):
data = request.get_json()
result = collection.update_one({'_id': repository_id}, {'$set': data})
if result.modified_count > 0:
return jsonify({'message': 'Repository updated'}), 200
else:
return jsonify({'message': 'Repository not found'}), 404
# Delete a repository by ID
@app.route('/repositories/<repository_id>', methods=['DELETE'])
def delete_repository(repository_id):
result = collection.delete_one({'_id': repository_id})
if result.deleted_count > 0:
return jsonify({'message': 'Repository deleted'}), 200
else:
return jsonify({'message': 'Repository not found'}), 404
# Parse repositories according to min and max stars from env
@app.route('/parse', methods=['GET'])
def parse():
stars_start = Parser.MinStars
stars_finish = Parser.MaxStars
if stars_start is None or stars_finish is None:
return jsonify({'message': 'Invalid input. Both stars_start and stars_finish should be numbers.'}), 400
stars = f'{stars_start}..{stars_finish}'
split_by_days(stars, datetime(2007, 1, 1), datetime(2024, 2, 2))
try:
return jsonify({'message': 'Data parsed'}, 200)
except Exception as e:
return jsonify({'message': 'Data not parsed', 'stack': str(e)}, 500)
if __name__ == '__main__':
app.run(debug=True, port=App.Port)

235
downloader/collect.py Normal file
View File

@ -0,0 +1,235 @@
#!/usr/bin/env python
"""Collect data on the most-starred repos using GitHub's GraphQL API."""
import json
import os
import requests
import time
from datetime import datetime, timedelta
from config import Parser
token = Parser.GithubToken
def query(payload, variables=None):
r = requests.post(
'https://api.github.com/graphql',
headers={'Authorization': f'bearer {token}'},
json={"query": payload, "variables": variables or {}}
)
r.raise_for_status()
return r.json()
repo_query = '''
query popular_repos($start: String, $num: Int!){
rateLimit {
cost
remaining
resetAt
}
search(query: "is:public %s", type: REPOSITORY, first: $num, after: $start) {
repositoryCount
pageInfo {
hasNextPage
endCursor
}
edges {
node {
... on Repository {
nameWithOwner
createdAt
forkCount
isFork
updatedAt
primaryLanguage {
name
}
stargazers {
totalCount
}
watchers {
totalCount
}
}
}
}
}
}
'''
count_query = '''
query {
rateLimit {
cost
remaining
resetAt
}
search(query: "is:public %s", type: REPOSITORY, first: 1) {
repositoryCount
}
}
'''
def get_repos(q, cursor, num):
return query(repo_query % q, {'start': cursor, 'num': num})['data']
def get_count(q):
return query(count_query % q)['data']['search']['repositoryCount']
def scrape(q, out_file):
path = f'responses/{out_file}'
if os.path.exists(path):
print('Skipping', path, 'already exists')
return
all_repos = []
cursor = None
print('Creating', path)
while True:
r = get_repos(q, cursor, 100)
search = r['search']
pi = search['pageInfo']
cursor = pi['endCursor']
has_next = pi['hasNextPage']
total = search['repositoryCount']
if total > 2000:
raise ValueError(f'Too many results for {q}: {total}')
all_repos += [e['node'] for e in search['edges']]
print(r['rateLimit'])
print(len(all_repos), ' / ', total, cursor)
if not has_next or r['rateLimit']['remaining'] < 10:
break
#print(all_repos)
from pymongo import MongoClient
client = MongoClient("mongodb://admin:admin@localhost:27017")
db = client['git']
collection = db['repos']
for repo in all_repos:
entity = {}
filter_dict = {}
for key, value in repo.items():
if key == "nameWithOwner":
filter_dict[key] = value
else:
entity[key] = value
collection.update_one(filter_dict, {"$set": entity}, upsert=True)
with open(path, 'w') as out:
print(out)
#json.dump(all_repos, out)
time.sleep(4)
def scrape_star_range(low, high):
"""Scrape a simple star range [low, high]."""
out_file = f'repos.stars={low}..{high}.json'
q = 'stars:%d..%d' % (low, high)
scrape(q, out_file)
def scrape_breaks():
breaks = json.load(open('breaks.json'))
for hi, lo in zip(breaks[:-1], breaks[1:]):
scrape_star_range(lo, hi - 1)
def scrape_star_dates():
for stars in range(123, 15, -1):
out_file = f'repos.star={stars}.-2015.json'
q = 'stars:%d created:<=2015' % stars
scrape(q, out_file)
out_file = f'repos.star={stars}.2016-.json'
q = 'stars:%d created:>=2016' % stars
scrape(q, out_file)
def query_for_star_years(stars, start, end):
q = 'stars:%s' % stars
if start == 2010 and end == 2023:
return q
elif start == 2010:
return f'{q} created:<={end}'
elif end == 2023:
return f'{q} created:>={start}'
else:
return f'{q} created:{start}..{end}'
def split_interval(a, b):
d = int((b - a) / 2)
return [(a, a + d), (a + d + 1, b)]
def split_by_year(stars, start, end):
if start == 2010 and end == 2023:
c = 1001 # we know this will fail.
elif start == end:
split_by_days(
stars,
datetime(start, 1, 1),
datetime(start, 12, 31)
)
return
else:
q = query_for_star_years(stars, start, end)
c = get_count(q)
if c <= 1000:
out_file = f'repos.star={stars}.{start}-{end}.json'
print(f'query: {q}')
scrape(q, out_file)
else:
if start == end:
raise ValueError(f'Can\'t split any more for {stars} / {start}')
print(f'{stars} {start}..{end} -> {c}, will split')
for a, b in split_interval(start, end):
split_by_year(stars, a, b)
def split_by_days(stars, day_start, day_end):
start_fmt = day_start.strftime('%Y-%m-%d')
end_fmt = day_end.strftime('%Y-%m-%d')
q = query_for_star_years(stars, start_fmt, end_fmt)
c = get_count(q)
if c <= 1000:
out_file = f'repos.star={stars}.{start_fmt}-{end_fmt}.json'
print(f'query: {q}')
scrape(q, out_file)
else:
days = (day_end - day_start).days
if days == 0:
raise ValueError(f'Can\'t split any more: {stars} / {day_start} .. {day_end}')
for a, b in split_interval(0, days):
dt_a = day_start + timedelta(days=a)
dt_b = day_start + timedelta(days=b)
split_by_days(stars, dt_a, dt_b)
def scrape_star_dates_split():
#for stars in range(83, 15, -1):
for stars in range(40, 15, -1):
split_by_year(stars, 2010, 2023)
def scrape_range_days():
# Scrape from a low star range up, splitting by creation date (which never changes).
# ranges = [(15, 20), (21, 25), (26, 30), (31, 35), (36, 40), (41, 45), (46, 49)]
#ranges = [(50, 60), (61, 70), (71, 80), (81, 90), (91, 100)]
#ranges = ranges + [(100, 119), (120, 139), (140, 159), (160, 179), (180, 200)]
#ranges = ranges + [(201, 225), (226, 250), (251, 300), (301, 400), (401, 500)]
#ranges = ranges + [(501, 700), (701, 1000), (1001, 1500), (1501, 5000), (5001, 1_000_000)]
ranges = [(1001, 1500), (1501, 5000), (5001, 1_000_000)]
for a, b in ranges:
stars = f'{a}..{b}'
split_by_days(stars, datetime(2007, 1, 1), datetime(2024, 2, 2))
#if __name__ == '__main__':
# scrape_breaks()
# scrape_star_dates()
# scrape_star_dates_split()
#scrape_range_days()

29
downloader/config.py Normal file
View File

@ -0,0 +1,29 @@
from utils.libs.decouple import config, UndefinedValueError
from utils.utils import str_to_bool
from utils.logger import logger
def set_conf_value(key):
try:
value = config(key)
if value == '':
logger.warn(f'WARNING: The variable {key} is an empty string.')
return value
except (UndefinedValueError):
logger.warn(f'WARNING: Please set the variable {key} in the .env file based on .env.example.')
return None
class App:
Port=set_conf_value('APP_PORT') or 5000
class Mongo:
Username=set_conf_value('MONGO_USER')
Password=set_conf_value('MONGO_PASS')
Host=set_conf_value('MONGO_HOST')
Port=set_conf_value('MONGO_PORT')
Database=set_conf_value('MONGO_DB') or 'git'
Collection=set_conf_value('MONGO_COLLECTION') or 'repos'
class Parser:
MinStars=set_conf_value('STARS_MIN')
MaxStars=set_conf_value('STARS_MAX')
GithubToken=set_conf_value('GITHUB_TOKEN')

View File

@ -0,0 +1,314 @@
# coding: utf-8
import os
import sys
import string
from shlex import shlex
from io import open
from collections import OrderedDict
# Useful for very coarse version differentiation.
PYVERSION = sys.version_info
if PYVERSION >= (3, 0, 0):
from configparser import ConfigParser
text_type = str
else:
from ConfigParser import SafeConfigParser as ConfigParser
text_type = unicode
if PYVERSION >= (3, 2, 0):
def read_config(parser, file): return parser.read_file(file)
else:
def read_config(parser, file): return parser.readfp(file)
DEFAULT_ENCODING = 'UTF-8'
# Python 3.10 don't have strtobool anymore. So we move it here.
TRUE_VALUES = {"y", "yes", "t", "true", "on", "1"}
FALSE_VALUES = {"n", "no", "f", "false", "off", "0"}
def strtobool(value):
if isinstance(value, bool):
return value
value = value.lower()
if value in TRUE_VALUES:
return True
elif value in FALSE_VALUES:
return False
raise ValueError("Invalid truth value: " + value)
class UndefinedValueError(Exception):
pass
class Undefined(object):
"""
Class to represent undefined type.
"""
pass
# Reference instance to represent undefined values
undefined = Undefined()
class Config(object):
"""
Handle .env file format used by Foreman.
"""
def __init__(self, repository):
self.repository = repository
def _cast_boolean(self, value):
"""
Helper to convert config values to boolean as ConfigParser do.
"""
value = str(value)
return bool(value) if value == '' else bool(strtobool(value))
@staticmethod
def _cast_do_nothing(value):
return value
def get(self, option, default=undefined, cast=undefined):
"""
Return the value for option or default if defined.
"""
# We can't avoid __contains__ because value may be empty.
if option in os.environ:
value = os.environ[option]
elif option in self.repository:
value = self.repository[option]
else:
if isinstance(default, Undefined):
raise UndefinedValueError(
'{} not found. Declare it as envvar or define a default value.'.format(option))
value = default
if isinstance(cast, Undefined):
cast = self._cast_do_nothing
elif cast is bool:
cast = self._cast_boolean
return cast(value)
def __call__(self, *args, **kwargs):
"""
Convenient shortcut to get.
"""
return self.get(*args, **kwargs)
class RepositoryEmpty(object):
def __init__(self, source='', encoding=DEFAULT_ENCODING):
pass
def __contains__(self, key):
return False
def __getitem__(self, key):
return None
class RepositoryIni(RepositoryEmpty):
"""
Retrieves option keys from .ini files.
"""
SECTION = 'settings'
def __init__(self, source, encoding=DEFAULT_ENCODING):
self.parser = ConfigParser()
with open(source, encoding=encoding) as file_:
read_config(self.parser, file_)
def __contains__(self, key):
return (key in os.environ or
self.parser.has_option(self.SECTION, key))
def __getitem__(self, key):
return self.parser.get(self.SECTION, key)
class RepositoryEnv(RepositoryEmpty):
"""
Retrieves option keys from .env files with fall back to os.environ.
"""
def __init__(self, source, encoding=DEFAULT_ENCODING):
self.data = {}
with open(source, encoding=encoding) as file_:
for line in file_:
line = line.strip()
if not line or line.startswith('#') or '=' not in line:
continue
k, v = line.split('=', 1)
k = k.strip()
v = v.strip()
if len(v) >= 2 and ((v[0] == "'" and v[-1] == "'") or (v[0] == '"' and v[-1] == '"')):
v = v[1:-1]
self.data[k] = v
def __contains__(self, key):
return key in os.environ or key in self.data
def __getitem__(self, key):
return self.data[key]
class RepositorySecret(RepositoryEmpty):
"""
Retrieves option keys from files,
where title of file is a key, content of file is a value
e.g. Docker swarm secrets
"""
def __init__(self, source='/run/secrets/'):
self.data = {}
ls = os.listdir(source)
for file in ls:
with open(os.path.join(source, file), 'r') as f:
self.data[file] = f.read()
def __contains__(self, key):
return key in os.environ or key in self.data
def __getitem__(self, key):
return self.data[key]
class AutoConfig(object):
"""
Autodetects the config file and type.
Parameters
----------
search_path : str, optional
Initial search path. If empty, the default search path is the
caller's path.
"""
SUPPORTED = OrderedDict([
('settings.ini', RepositoryIni),
('.env', RepositoryEnv),
])
encoding = DEFAULT_ENCODING
def __init__(self, search_path=None):
self.search_path = search_path
self.config = None
def _find_file(self, path):
# look for all files in the current path
for configfile in self.SUPPORTED:
filename = os.path.join(path, configfile)
if os.path.isfile(filename):
return filename
# search the parent
parent = os.path.dirname(path)
if parent and parent != os.path.abspath(os.sep):
return self._find_file(parent)
# reached root without finding any files.
return ''
def _load(self, path):
# Avoid unintended permission errors
try:
filename = self._find_file(os.path.abspath(path))
except Exception:
filename = ''
Repository = self.SUPPORTED.get(os.path.basename(filename), RepositoryEmpty)
self.config = Config(Repository(filename, encoding=self.encoding))
def _caller_path(self):
# MAGIC! Get the caller's module path.
frame = sys._getframe()
path = os.path.dirname(frame.f_back.f_back.f_code.co_filename)
return path
def __call__(self, *args, **kwargs):
if not self.config:
self._load(self.search_path or self._caller_path())
return self.config(*args, **kwargs)
# A pré-instantiated AutoConfig to improve decouple's usability
# now just import config and start using with no configuration.
config = AutoConfig()
# Helpers
class Csv(object):
"""
Produces a csv parser that return a list of transformed elements.
"""
def __init__(self, cast=text_type, delimiter=',', strip=string.whitespace, post_process=list):
"""
Parameters:
cast -- callable that transforms the item just before it's added to the list.
delimiter -- string of delimiters chars passed to shlex.
strip -- string of non-relevant characters to be passed to str.strip after the split.
post_process -- callable to post process all casted values. Default is `list`.
"""
self.cast = cast
self.delimiter = delimiter
self.strip = strip
self.post_process = post_process
def __call__(self, value):
"""The actual transformation"""
if value is None:
return self.post_process()
def transform(s): return self.cast(s.strip(self.strip))
splitter = shlex(value, posix=True)
splitter.whitespace = self.delimiter
splitter.whitespace_split = True
return self.post_process(transform(s) for s in splitter)
class Choices(object):
"""
Allows for cast and validation based on a list of choices.
"""
def __init__(self, flat=None, cast=text_type, choices=None):
"""
Parameters:
flat -- a flat list of valid choices.
cast -- callable that transforms value before validation.
choices -- tuple of Django-like choices.
"""
self.flat = flat or []
self.cast = cast
self.choices = choices or []
self._valid_values = []
self._valid_values.extend(self.flat)
self._valid_values.extend([value for value, _ in self.choices])
def __call__(self, value):
transform = self.cast(value)
if transform not in self._valid_values:
raise ValueError((
'Value not in list: {!r}; valid values are {!r}'
).format(value, self._valid_values))
else:
return transform

View File

@ -0,0 +1,264 @@
import re
import requests
from http import HTTPStatus
import json as JSON
from infra.config import BaseUri, Settings
from utils.logger import logger, bcolors
from utils.singleton import SingletonMeta
swagger_api_json_endpoint = '/api-json'
api_info_urls = {
BaseUri.Iguana: 'Iguana',
BaseUri.Pyrador: 'Pyrador',
BaseUri.Zoo: 'Zoo'
}
excluded_endpoints = [
('POST', BaseUri.Iguana + '/api/v1/test/controller'),
('GET', BaseUri.Iguana + '/api/v1/test/controller'),
('DELETE', BaseUri.Iguana + '/api/v1/test/controller'),
('GET', BaseUri.Iguana + '/api/v1/health'),
('GET', BaseUri.Iguana + '/api/v1/metrics'),
('GET', BaseUri.Iguana + '/api/v1/settings'),
('POST', BaseUri.Iguana + '/api/v1/settings'),
('PUT', BaseUri.Iguana + '/api/v1/settings'),
('GET', BaseUri.Iguana + '/api/v1/activity'),
('GET', BaseUri.Iguana + '/api/v1/activity/{activity_id}'),
('POST', BaseUri.Iguana + '/api/v1/doorlock'),
('PUT', BaseUri.Iguana + '/api/v1/profile/set-account-number'),
('PUT', BaseUri.Iguana + '/api/v1/profile/address'),
('PUT', BaseUri.Iguana + '/api/v1/profile/contact'),
('POST', BaseUri.Iguana + '/api/v1/profile/set-firebase-token'),
('PUT', BaseUri.Iguana + '/api/v1/profile/balance'),
('GET', BaseUri.Iguana + '/api/v1/providable-service/{place_id}'),
('POST', BaseUri.Iguana + '/api/v1/light-device/toggle'),
('GET', BaseUri.Iguana + '/api/v1/light-device/state/{device_id}'),
('PUT', BaseUri.Iguana + '/api/v1/user-place/{place_id}'),
('GET', BaseUri.Iguana + '/api/v1/user-place/{place_id}/services'),
('PUT', BaseUri.Iguana + '/api/v1/user-place/set/status'),
('POST', BaseUri.Iguana + '/api/v1/profile/device/to/service'),
('DELETE', BaseUri.Iguana + '/api/v1/profile/device/from/service'),
('GET', BaseUri.Iguana + '/api/v1/profile/place/{place_id}/service/devices/{device_category}'),
('POST', BaseUri.Iguana + '/api/v1/room'),
('GET', BaseUri.Iguana + '/api/v1/room/by/place/{parent_id}'),
('PUT', BaseUri.Iguana + '/api/v1/room/{id}'),
('DELETE', BaseUri.Iguana + '/api/v1/room/{id}'),
('GET', BaseUri.Iguana + '/api/v1/device/list/{type}'),
('DELETE', BaseUri.Iguana + '/api/v1/user-place/qrcode'),
('POST', BaseUri.Iguana + '/api/v1/billing'),
('POST', BaseUri.Iguana + '/api/v1/intercom/acceptCall'), # TODO: test it with notifications
('POST', BaseUri.Iguana + '/api/v1/upload/avatar'), # TODO: unable to test
('POST', BaseUri.Zoo + '/api/v1/notifications/send-notification'),
('POST', BaseUri.Zoo + '/api/v1/notifications/send-sms'),
('DELETE', BaseUri.Zoo + '/api/v1/place/available_services')
]
class APICoverageTracker(metaclass=SingletonMeta):
def __init__(self):
self.called_endpoints = {}
self.api_info = self.request_api_info(api_info_urls)
self.build_called_endpoints()
def request_api_info(self, urls):
api_info = {}
for url in urls:
res = requests.get(url + swagger_api_json_endpoint)
api_info[url] = res.json()
return api_info
def build_called_endpoints(self):
for url, info in self.api_info.items():
try:
paths = info.get('paths')
if not url in self.called_endpoints:
self.called_endpoints[url] = {}
for path, methods in paths.items():
endpoint = url + path
self.called_endpoints[url][path] = {}
for method, method_info in methods.items():
if (method.upper(), endpoint) in excluded_endpoints:
continue
self.called_endpoints[url][path][method] = 0
except Exception as e:
logger.error('Error happened while getting api info:', e)
def endpoint_is_called(self, called_endpoint, method):
if not Settings.EnableCoverageStatistics:
return
for url, paths in self.called_endpoints.items():
for path, methods in paths.items():
endpoint = url + path
pattern = re.sub(r'{.+?}', r'[^/]+', endpoint) + '$'
if re.match(pattern, called_endpoint) and method.lower() in methods:
self.called_endpoints[url][path][method.lower()] += 1
return
def print_coverage(self):
def calculate_coverage_statistics(total_urls, covered_urls):
if total_urls == 0:
return 0
coverage_percentage = int(covered_urls / total_urls * 100)
if coverage_percentage < 50:
color = bcolors.FAIL
elif coverage_percentage < 75:
color = bcolors.WARNING
else:
color = bcolors.OKGREEN
statistics = f'{coverage_percentage}% ({covered_urls} / {total_urls})'
return f'{color}{statistics}{bcolors.ENDC}'
def count_urls(gateway_url):
urls_num = 0
covered_urls_num = 0
for url, paths in self.called_endpoints.items():
for path, methods in paths.items():
endpoint = url + path
if gateway_url in endpoint:
for method, num_of_calls in methods.items():
urls_num += 1
if num_of_calls > 0:
covered_urls_num += 1
else:
logger.warn(f'{method.upper()} {endpoint} is not covered')
return urls_num, covered_urls_num
if not Settings.EnableCoverageStatistics:
return
urls_num_sum = 0
covered_urls_num_sum = 0
urls_info = \
[(gateway_name, count_urls(gateway_url)) \
for gateway_url, gateway_name in api_info_urls.items()]
logger.info('Coverage statistics:')
logger.info()
for gateway_name, (urls_num, covered_urls_num) in urls_info:
coverage_statistics = calculate_coverage_statistics(urls_num, covered_urls_num)
message = f' {gateway_name}: {coverage_statistics}'
logger.info(message)
urls_num_sum += urls_num
covered_urls_num_sum += covered_urls_num
coverage_statistics = \
calculate_coverage_statistics(urls_num_sum, covered_urls_num_sum)
logger.info()
logger.info(f' Total: {coverage_statistics}\n')
class Response(requests.Response):
def __init__(self, status_code=HTTPStatus.OK):
super().__init__()
self.status_code = status_code
def log_req(method, url, params=None, data=None, json=None, headers=None):
logger.verbose(f'============================================================')
logger.verbose(f'[REQUEST] {method} {url}')
if params:
logger.verbose(f'params: {params}')
if data:
data = JSON.dumps(data, sort_keys=True, indent=4)
logger.verbose(f'data: {data}')
if json:
json = JSON.dumps(json, sort_keys=True, indent=4)
logger.verbose(f'json: {json}')
if headers:
headers = JSON.dumps(headers, sort_keys=True, indent=4)
logger.verbose(f'headers: {headers}')
def log_res(res: requests.Response):
req = res.request
logger.verbose(f'[RESPONSE] {req.method} {req.url} {res.status_code}')
try:
json = JSON.dumps(res.json(), sort_keys=True, indent=4).replace('\\"', '"')
lines_num = json.count('\n')
max_lines_num = Settings.LoggingResponseMaxLinesNum
if lines_num <= max_lines_num:
logger.verbose(f'json: {json}')
else:
stats = f'{lines_num}/{max_lines_num}'
logger.verbose(f'Maximum number of lines for response exceeded:', stats)
except ValueError:
logger.verbose('response:', res.content)
except Exception as e:
logger.verbose(e)
def request(method, url, headers=None, **kwargs):
APICoverageTracker().endpoint_is_called(url, method)
log_req(method, url, params=kwargs.get('params'), \
data=kwargs.get('data'), json=kwargs.get('json'), headers=headers)
res = requests.request(method, url, **kwargs)
log_res(res)
return res
def get(url, params=None, headers=None, **kwargs):
method = 'GET'
APICoverageTracker().endpoint_is_called(url, method)
log_req(method, url, params=params, \
data=kwargs.get('data'), json=kwargs.get('json'), headers=headers)
res = requests.get(url, params=params, headers=headers, **kwargs)
log_res(res)
return res
def options(url, headers=None, **kwargs):
method = 'OPTIONS'
APICoverageTracker().endpoint_is_called(url, method)
log_req(method, url, params=kwargs.get('params'), \
data=kwargs.get('data'), json=kwargs.get('json'), headers=headers)
res = requests.options(url, headers=headers, **kwargs)
log_res(res)
return res
def head(url, headers=None, **kwargs):
method = 'HEAD'
APICoverageTracker().endpoint_is_called(url, method)
log_req(method, url, params=kwargs.get('params'), \
data=kwargs.get('data'), json=kwargs.get('json'), headers=headers)
res = requests.head(url, headers=headers, **kwargs)
log_res(res)
return res
def post(url, data=None, json=None, headers=None, **kwargs):
method = 'POST'
APICoverageTracker().endpoint_is_called(url, method)
log_req(method, url, params=kwargs.get('params'), \
data=data, json=json, headers=headers)
res = requests.post(url, data=data, json=json, headers=headers, **kwargs)
log_res(res)
return res
def put(url, data=None, headers=None, **kwargs):
method = 'PUT'
APICoverageTracker().endpoint_is_called(url, method)
log_req(method, url, params=kwargs.get('params'), \
data=data, json=kwargs.get('json'), headers=headers),
res = requests.put(url, data=data, headers=headers, **kwargs)
log_res(res)
return res
def patch(url, data=None, headers=None, **kwargs):
method = 'PATCH'
APICoverageTracker().endpoint_is_called(url, method)
log_req(method, url, params=kwargs.get('params'), \
data=data, json=kwargs.get('json'), headers=headers)
res = requests.patch(url, data=data, headers=headers, **kwargs)
log_res(res)
return res
def delete(url, headers=None, **kwargs):
method = 'DELETE'
APICoverageTracker().endpoint_is_called(url, method)
log_req(method, url, params=kwargs.get('params'), \
data=kwargs.get('data'), json=kwargs.get('json'), headers=headers)
res = requests.delete(url, headers=headers, **kwargs)
log_res(res)
return res

View File

@ -0,0 +1,69 @@
import inspect
import traceback
from utils.utils import trace_origin
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
class logger:
def debug(*args, **kwargs):
from infra.config import Settings
if not Settings.EnableDebugMessages:
return
print(bcolors.HEADER, end='[DEBUG] ')
print(*args, **kwargs)
print(bcolors.ENDC, end='')
def error(*args, **kwargs):
print(bcolors.FAIL, end='[ERROR] ')
print(*args, **kwargs)
print(bcolors.ENDC, end='')
def warn(*args, trace_origin_flag=False, stacklevel=0, \
print_stack=False, **kwargs):
print(bcolors.WARNING, end='[WARNING] ')
if trace_origin_flag:
trace_origin(inspect.currentframe(), stacklevel)
if print_stack:
print(traceback.format_exc())
print(*args, **kwargs)
print(bcolors.ENDC, end='')
def verbose(*args, trace_origin_flag=False, stacklevel=0, **kwargs):
from infra.config import Settings
if not Settings.EnableVerboseMessages:
return
if trace_origin_flag:
trace_origin(inspect.currentframe(), stacklevel)
print(bcolors.OKCYAN, end='[VERBOSE] ')
print(*args, **kwargs)
print(bcolors.ENDC, end='')
def log(*args, **kwargs):
print(bcolors.OKGREEN, end='[LOG] ')
print(*args, **kwargs)
print(bcolors.ENDC, end='')
def info(*args, **kwargs):
print(bcolors.OKBLUE, end='[INFO] ')
print(*args, **kwargs)
print(bcolors.ENDC, end='')
class DisableVerbose(object):
def __enter__(self):
from infra.config import Settings
self.verbose_flag = Settings.EnableVerboseMessages
Settings.EnableVerboseMessages = False
def __exit__(self, exc_type, exc_value, traceback):
from infra.config import Settings
Settings.EnableVerboseMessages = self.verbose_flag

19
downloader/utils/utils.py Normal file
View File

@ -0,0 +1,19 @@
def str_to_bool(val: str):
if not val:
return False
val = val.lower()
if val in ('y', 'yes', 't', 'true', 'on', '1'):
return True
elif val in ('', 'n', 'no', 'f', 'false', 'off', '0'):
return False
else:
raise ValueError('invalid truth value %r' % (val,))
def trace_origin(initial_frame, stacklevel=0):
frame = initial_frame.f_back
for _ in range(stacklevel + 1):
frame = frame.f_back
file_name = frame.f_code.co_filename
line_number = frame.f_lineno
func_name = frame.f_code.co_name
print(file_name, ":", line_number, ": ", func_name, ": ")

20
mongo/docker-compose.yml Normal file
View File

@ -0,0 +1,20 @@
version: '3.6'
services:
mongodb:
image: mongo:4.2.8
container_name: mongo
ports:
- 27017:27017
environment:
- MONGO_INITDB_DATABASE=test
- MONGO_INITDB_ROOT_USERNAME=admin
- MONGO_INITDB_ROOT_PASSWORD=admin
volumes:
- ./docker-entrypoint-initdb.d:/docker-entrypoint-initdb.d:ro
- mongodb:/data/db
- mongoconfig:/data/configdb
volumes:
mongodb:
mongoconfig:

View File

@ -0,0 +1,13 @@
db.createUser(
{
user: "git_downloader",
pwd: "password",
roles: [
{
role: "readWrite",
db: "git"
}
]
}
);
db.createCollection("repos");