Sufficient update

This commit is contained in:
Minecon724 2025-01-17 15:16:51 +01:00
parent 3852e1d425
commit 3b7ecfaa69
Signed by: Minecon724
GPG key ID: 3CCC4D267742C8E8
5 changed files with 49 additions and 16 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
venv/
cache/
__pycache__/

12
Dockerfile Normal file
View file

@ -0,0 +1,12 @@
FROM python:3.13-alpine
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY src ./src
EXPOSE 8000
RUN useradd app
USER app
CMD ["hypercorn", "src.__main__:app", "--host", "0.0.0.0"]

View file

@ -1,7 +1,18 @@
Change `base_url` and run it as you would run a quart app
Change `BASE_URL` environment variable to your URL!
This is a MITM between you and wikipedia, basically it impersonates wikipedia.
Production:
I don't know if that's legal, but I don't think you're going to get sued.
```bash
BASE_URL=wikipedia.localhost:8000 hypercorn src.__main__:app
```
It's easier to maintain this way. If somebody turns out to have a problem, I will do something about it
Debug:
```bash
BASE_URL=wikipedia.localhost:8000 python3 src/
```
## Privacy
Nothing is intentionally logged, however, it's possible to tell what pages were accessed by the instance and when, due to cache. And leaking happens.

View file

@ -1,2 +1,2 @@
quart
aiohttp[speedups]
quart

View file

@ -3,17 +3,22 @@ import aiohttp, aiofiles
import aiofiles.os
from time import time
from binascii import crc32
from os import getenv
import re
app = Quart(__name__, static_folder=None)
base_url = 'wikipedia.localhost:5000'
base_proto = 'http'
base_url = 'wikipedia.localhost:8000'
def fix_url(text):
print("Base URL is " + base_proto + "://" + base_url)
def fix_url(text: str) -> str:
pattern = r'((https:|http:)?//([\w-]+\.)+)(wikipedia|wikimedia)\.org'
def repl(match):
protocol = match.group(2)
if protocol:
return 'http://' + match.group(3) + base_url
return base_proto + '://' + match.group(3) + base_url
else:
return '//' + match.group(3) + base_url
return re.sub(pattern, repl, text)
@ -22,7 +27,10 @@ def fix_url(text):
@app.route('/<path:path>')
async def proxy(path):
host = request.headers.get('Host')
dep = host.split('.')[0]
dep = host[:-len(base_url)]
if dep != '':
dep = dep[:-1] # remove dot
if not host.endswith(base_url):
return '', 200
@ -83,19 +91,21 @@ async def proxy(path):
headers = {k.lower(): v for k,v in headers if k.lower() in ['cache-control', 'age', 'content-language', 'content-type', 'last-modified', 'date', 'x-content-type-options', 'location']}
content = await response.content.read()
if 'location' in headers:
print(headers['location'])
print(request.url_root)
if 'location' in headers: # if a redirect
headers['location'] = fix_url(headers['location'])
if any([headers['content-type'].startswith(t) for t in ['text/html', 'text/javascript', 'application']]):
try:
content = fix_url(content.decode()).encode()
except: # if file is not a text file
pass
headers['content-length'] = str(len(content))
headers['x-rp-cache'] = 'MISS'
await aiofiles.os.makedirs(cache_path, exist_ok=True)
async with aiofiles.open(cache_path + '/_file', mode='wb') as f:
await f.write(int.to_bytes(int(time()-int(headers['age'])), 8))
await f.write(int.to_bytes(int(time() - int(headers['age'])), 8))
await f.write(int.to_bytes(response.status, 2))
await f.write(int.to_bytes(len(headers)))
for k, v in headers.items():
@ -107,11 +117,10 @@ async def proxy(path):
await f.write(int.to_bytes(len(value_encoded)))
await f.write(value_encoded)
await f.write(content)
print('cache written')
r = Response(content, status=response.status, headers=headers)
return r
if __name__ == "__main__":
app.debug = True
app.run()
app.run(port=8000)