fix
This commit is contained in:
parent
d94a244bc4
commit
3b9a576290
@ -7,15 +7,23 @@ import os
|
||||
import requests
|
||||
import time
|
||||
from datetime import datetime, timedelta
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from config import Parser
|
||||
|
||||
token = Parser.GithubToken
|
||||
|
||||
def query(payload, variables=None):
|
||||
r = requests.post(
|
||||
session = requests.Session()
|
||||
session.headers.update({'Authorization': f'bearer {token}'})
|
||||
retry = Retry(connect=3, backoff_factor=0.5)
|
||||
adapter = HTTPAdapter(max_retries=retry)
|
||||
session.mount('http://', adapter)
|
||||
session.mount('https://', adapter)
|
||||
|
||||
r = session.post(
|
||||
'https://api.github.com/graphql',
|
||||
headers={'Authorization': f'bearer {token}'},
|
||||
json={"query": payload, "variables": variables or {}}
|
||||
)
|
||||
r.raise_for_status()
|
||||
@ -80,13 +88,13 @@ def get_count(q):
|
||||
|
||||
|
||||
def scrape(q, out_file):
|
||||
path = f'responses/{out_file}'
|
||||
if os.path.exists(path):
|
||||
print('Skipping', path, 'already exists')
|
||||
return
|
||||
#path = f'responses/{out_file}'
|
||||
#if os.path.exists(path):
|
||||
# print('Skipping', path, 'already exists')
|
||||
# return
|
||||
all_repos = []
|
||||
cursor = None
|
||||
print('Creating', path)
|
||||
#print('Creating', path)
|
||||
while True:
|
||||
r = get_repos(q, cursor, 100)
|
||||
search = r['search']
|
||||
@ -94,8 +102,8 @@ def scrape(q, out_file):
|
||||
cursor = pi['endCursor']
|
||||
has_next = pi['hasNextPage']
|
||||
total = search['repositoryCount']
|
||||
if total > 2000:
|
||||
raise ValueError(f'Too many results for {q}: {total}')
|
||||
#if total > 2000:
|
||||
# raise ValueError(f'Too many results for {q}: {total}')
|
||||
all_repos += [e['node'] for e in search['edges']]
|
||||
print(r['rateLimit'])
|
||||
print(len(all_repos), ' / ', total, cursor)
|
||||
@ -118,8 +126,8 @@ def scrape(q, out_file):
|
||||
|
||||
collection.update_one(filter_dict, {"$set": entity}, upsert=True)
|
||||
|
||||
with open(path, 'w') as out:
|
||||
print(out)
|
||||
#with open(path, 'w') as out:
|
||||
#print(out)
|
||||
#json.dump(all_repos, out)
|
||||
time.sleep(4)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user