117 lines
4.4 KiB
Python
117 lines
4.4 KiB
Python
from quart import Quart, Response, request
|
|
import aiohttp, aiofiles
|
|
import aiofiles.os
|
|
from time import time
|
|
from binascii import crc32
|
|
import re
|
|
|
|
app = Quart(__name__, static_folder=None)
|
|
base_url = 'wikipedia.localhost:5000'
|
|
|
|
def fix_url(text):
|
|
pattern = r'((https:|http:)?//([\w-]+\.)+)(wikipedia|wikimedia)\.org'
|
|
def repl(match):
|
|
protocol = match.group(2)
|
|
if protocol:
|
|
return 'http://' + match.group(3) + base_url
|
|
else:
|
|
return '//' + match.group(3) + base_url
|
|
return re.sub(pattern, repl, text)
|
|
|
|
@app.route('/', defaults={'path': ''})
|
|
@app.route('/<path:path>')
|
|
async def proxy(path):
|
|
host = request.headers.get('Host')
|
|
dep = host.split('.')[0]
|
|
|
|
if not host.endswith(base_url):
|
|
return '', 200
|
|
elif host == base_url:
|
|
url = 'https://wikipedia.org'
|
|
dep = '_main'
|
|
elif dep in ['intake-analytics']:
|
|
return '', 200
|
|
elif dep in ['upload', 'meta', 'login', 'maps', 'foundation', 'developer']:
|
|
url = f'https://{dep}.wikimedia.org'
|
|
else:
|
|
url = f'https://{dep}.wikipedia.org'
|
|
|
|
cache_path = 'cache/' + dep + '/' + path
|
|
|
|
if len(request.args) > 0:
|
|
path += '?' + '&'.join([k+'='+v for k,v in request.args.items()])
|
|
cache_path += '_v' + str(crc32(path.encode()))
|
|
|
|
if any([(i in ['..', '.']) for i in path.split('/')]):
|
|
return 'Forbidden', 403
|
|
|
|
cached = await aiofiles.os.path.exists(cache_path + '/_file')
|
|
cached = False
|
|
if cached:
|
|
async with aiofiles.open(cache_path + '/_file', mode='rb') as f:
|
|
timestamp = int.from_bytes(await f.read(8))
|
|
status = int.from_bytes(await f.read(2))
|
|
headers = {}
|
|
for i in range(int.from_bytes(await f.read(1))):
|
|
key_length = int.from_bytes(await f.read(1))
|
|
key = await f.read(key_length)
|
|
key = key.decode()
|
|
|
|
value_length = int.from_bytes(await f.read(1))
|
|
value = await f.read(value_length)
|
|
value = value.decode()
|
|
|
|
headers[key] = value
|
|
|
|
headers['age'] = int(time() - timestamp)
|
|
headers['x-rp-cache'] = 'HIT'
|
|
|
|
cache_max_age = 0
|
|
|
|
if 'cache-control' in headers:
|
|
cache_max_age = int([i.strip() for i in headers['cache-control'].split(',') if i.strip().startswith('max-age=')][0][8:])
|
|
|
|
if headers['age'] < cache_max_age:
|
|
content = await f.read(int(headers['content-length']))
|
|
|
|
r = Response(content, status=status, headers=headers)
|
|
return r
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(url + '/' + path, allow_redirects=False) as response:
|
|
headers = response.headers.items()
|
|
headers = {k.lower(): v for k,v in headers if k.lower() in ['cache-control', 'age', 'content-language', 'content-type', 'last-modified', 'date', 'x-content-type-options', 'location']}
|
|
content = await response.content.read()
|
|
|
|
if 'location' in headers:
|
|
print(headers['location'])
|
|
print(request.url_root)
|
|
headers['location'] = fix_url(headers['location'])
|
|
|
|
if any([headers['content-type'].startswith(t) for t in ['text/html', 'text/javascript', 'application']]):
|
|
content = fix_url(content.decode()).encode()
|
|
|
|
headers['content-length'] = str(len(content))
|
|
|
|
await aiofiles.os.makedirs(cache_path, exist_ok=True)
|
|
async with aiofiles.open(cache_path + '/_file', mode='wb') as f:
|
|
await f.write(int.to_bytes(int(time()-int(headers['age'])), 8))
|
|
await f.write(int.to_bytes(response.status, 2))
|
|
await f.write(int.to_bytes(len(headers)))
|
|
for k, v in headers.items():
|
|
key_encoded = k.encode()
|
|
await f.write(int.to_bytes(len(key_encoded)))
|
|
await f.write(key_encoded)
|
|
|
|
value_encoded = v.encode()
|
|
await f.write(int.to_bytes(len(value_encoded)))
|
|
await f.write(value_encoded)
|
|
await f.write(content)
|
|
print('cache written')
|
|
|
|
r = Response(content, status=response.status, headers=headers)
|
|
return r
|
|
|
|
if __name__ == "__main__":
|
|
app.debug = True
|
|
app.run()
|