fix
This commit is contained in:
parent
d94a244bc4
commit
3b9a576290
@ -7,15 +7,23 @@ import os
|
|||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
from urllib3.util.retry import Retry
|
||||||
|
|
||||||
from config import Parser
|
from config import Parser
|
||||||
|
|
||||||
token = Parser.GithubToken
|
token = Parser.GithubToken
|
||||||
|
|
||||||
def query(payload, variables=None):
|
def query(payload, variables=None):
|
||||||
r = requests.post(
|
session = requests.Session()
|
||||||
|
session.headers.update({'Authorization': f'bearer {token}'})
|
||||||
|
retry = Retry(connect=3, backoff_factor=0.5)
|
||||||
|
adapter = HTTPAdapter(max_retries=retry)
|
||||||
|
session.mount('http://', adapter)
|
||||||
|
session.mount('https://', adapter)
|
||||||
|
|
||||||
|
r = session.post(
|
||||||
'https://api.github.com/graphql',
|
'https://api.github.com/graphql',
|
||||||
headers={'Authorization': f'bearer {token}'},
|
|
||||||
json={"query": payload, "variables": variables or {}}
|
json={"query": payload, "variables": variables or {}}
|
||||||
)
|
)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
@ -80,13 +88,13 @@ def get_count(q):
|
|||||||
|
|
||||||
|
|
||||||
def scrape(q, out_file):
|
def scrape(q, out_file):
|
||||||
path = f'responses/{out_file}'
|
#path = f'responses/{out_file}'
|
||||||
if os.path.exists(path):
|
#if os.path.exists(path):
|
||||||
print('Skipping', path, 'already exists')
|
# print('Skipping', path, 'already exists')
|
||||||
return
|
# return
|
||||||
all_repos = []
|
all_repos = []
|
||||||
cursor = None
|
cursor = None
|
||||||
print('Creating', path)
|
#print('Creating', path)
|
||||||
while True:
|
while True:
|
||||||
r = get_repos(q, cursor, 100)
|
r = get_repos(q, cursor, 100)
|
||||||
search = r['search']
|
search = r['search']
|
||||||
@ -94,8 +102,8 @@ def scrape(q, out_file):
|
|||||||
cursor = pi['endCursor']
|
cursor = pi['endCursor']
|
||||||
has_next = pi['hasNextPage']
|
has_next = pi['hasNextPage']
|
||||||
total = search['repositoryCount']
|
total = search['repositoryCount']
|
||||||
if total > 2000:
|
#if total > 2000:
|
||||||
raise ValueError(f'Too many results for {q}: {total}')
|
# raise ValueError(f'Too many results for {q}: {total}')
|
||||||
all_repos += [e['node'] for e in search['edges']]
|
all_repos += [e['node'] for e in search['edges']]
|
||||||
print(r['rateLimit'])
|
print(r['rateLimit'])
|
||||||
print(len(all_repos), ' / ', total, cursor)
|
print(len(all_repos), ' / ', total, cursor)
|
||||||
@ -118,8 +126,8 @@ def scrape(q, out_file):
|
|||||||
|
|
||||||
collection.update_one(filter_dict, {"$set": entity}, upsert=True)
|
collection.update_one(filter_dict, {"$set": entity}, upsert=True)
|
||||||
|
|
||||||
with open(path, 'w') as out:
|
#with open(path, 'w') as out:
|
||||||
print(out)
|
#print(out)
|
||||||
#json.dump(all_repos, out)
|
#json.dump(all_repos, out)
|
||||||
time.sleep(4)
|
time.sleep(4)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user