Sufficient update
This commit is contained in:
parent
3852e1d425
commit
3b7ecfaa69
5 changed files with 49 additions and 16 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,2 +1,3 @@
|
|||
venv/
|
||||
cache/
|
||||
__pycache__/
|
12
Dockerfile
Normal file
12
Dockerfile
Normal file
|
@ -0,0 +1,12 @@
|
|||
FROM python:3.13-alpine
|
||||
|
||||
COPY requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY src ./src
|
||||
EXPOSE 8000
|
||||
|
||||
RUN useradd app
|
||||
USER app
|
||||
|
||||
CMD ["hypercorn", "src.__main__:app", "--host", "0.0.0.0"]
|
19
README.md
19
README.md
|
@ -1,7 +1,18 @@
|
|||
Change `base_url` and run it as you would run a quart app
|
||||
Change `BASE_URL` environment variable to your URL!
|
||||
|
||||
This is a MITM between you and wikipedia, basically it impersonates wikipedia.
|
||||
Production:
|
||||
|
||||
I don't know if that's legal, but I don't think you're going to get sued.
|
||||
```bash
|
||||
BASE_URL=wikipedia.localhost:8000 hypercorn src.__main__:app
|
||||
```
|
||||
|
||||
It's easier to maintain this way. If somebody turns out to have a problem, I will do something about it
|
||||
Debug:
|
||||
|
||||
```bash
|
||||
BASE_URL=wikipedia.localhost:8000 python3 src/
|
||||
```
|
||||
|
||||
|
||||
## Privacy
|
||||
|
||||
Nothing is intentionally logged, however, it's possible to tell what pages were accessed by the instance and when, due to cache. And leaking happens.
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
quart
|
||||
aiohttp[speedups]
|
||||
quart
|
|
@ -3,17 +3,22 @@ import aiohttp, aiofiles
|
|||
import aiofiles.os
|
||||
from time import time
|
||||
from binascii import crc32
|
||||
from os import getenv
|
||||
import re
|
||||
|
||||
app = Quart(__name__, static_folder=None)
|
||||
base_url = 'wikipedia.localhost:5000'
|
||||
base_proto = 'http'
|
||||
base_url = 'wikipedia.localhost:8000'
|
||||
|
||||
def fix_url(text):
|
||||
print("Base URL is " + base_proto + "://" + base_url)
|
||||
|
||||
def fix_url(text: str) -> str:
|
||||
pattern = r'((https:|http:)?//([\w-]+\.)+)(wikipedia|wikimedia)\.org'
|
||||
|
||||
def repl(match):
|
||||
protocol = match.group(2)
|
||||
if protocol:
|
||||
return 'http://' + match.group(3) + base_url
|
||||
return base_proto + '://' + match.group(3) + base_url
|
||||
else:
|
||||
return '//' + match.group(3) + base_url
|
||||
return re.sub(pattern, repl, text)
|
||||
|
@ -22,7 +27,10 @@ def fix_url(text):
|
|||
@app.route('/<path:path>')
|
||||
async def proxy(path):
|
||||
host = request.headers.get('Host')
|
||||
dep = host.split('.')[0]
|
||||
|
||||
dep = host[:-len(base_url)]
|
||||
if dep != '':
|
||||
dep = dep[:-1] # remove dot
|
||||
|
||||
if not host.endswith(base_url):
|
||||
return '', 200
|
||||
|
@ -83,19 +91,21 @@ async def proxy(path):
|
|||
headers = {k.lower(): v for k,v in headers if k.lower() in ['cache-control', 'age', 'content-language', 'content-type', 'last-modified', 'date', 'x-content-type-options', 'location']}
|
||||
content = await response.content.read()
|
||||
|
||||
if 'location' in headers:
|
||||
print(headers['location'])
|
||||
print(request.url_root)
|
||||
if 'location' in headers: # if a redirect
|
||||
headers['location'] = fix_url(headers['location'])
|
||||
|
||||
if any([headers['content-type'].startswith(t) for t in ['text/html', 'text/javascript', 'application']]):
|
||||
try:
|
||||
content = fix_url(content.decode()).encode()
|
||||
except: # if file is not a text file
|
||||
pass
|
||||
|
||||
headers['content-length'] = str(len(content))
|
||||
headers['x-rp-cache'] = 'MISS'
|
||||
|
||||
await aiofiles.os.makedirs(cache_path, exist_ok=True)
|
||||
|
||||
async with aiofiles.open(cache_path + '/_file', mode='wb') as f:
|
||||
await f.write(int.to_bytes(int(time()-int(headers['age'])), 8))
|
||||
await f.write(int.to_bytes(int(time() - int(headers['age'])), 8))
|
||||
await f.write(int.to_bytes(response.status, 2))
|
||||
await f.write(int.to_bytes(len(headers)))
|
||||
for k, v in headers.items():
|
||||
|
@ -107,11 +117,10 @@ async def proxy(path):
|
|||
await f.write(int.to_bytes(len(value_encoded)))
|
||||
await f.write(value_encoded)
|
||||
await f.write(content)
|
||||
print('cache written')
|
||||
|
||||
r = Response(content, status=response.status, headers=headers)
|
||||
return r
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.debug = True
|
||||
app.run()
|
||||
app.run(port=8000)
|
Loading…
Add table
Reference in a new issue