2023-08-09 12:19:54 -04:00
|
|
|
import logging
|
|
|
|
import json
|
2023-08-15 15:56:42 -04:00
|
|
|
import requests
|
|
|
|
import base64
|
|
|
|
import datetime
|
2023-08-10 14:27:33 -04:00
|
|
|
from synapse.module_api import ModuleApi
|
2023-08-09 12:19:54 -04:00
|
|
|
from twisted.web import http
|
2023-08-10 13:09:20 -04:00
|
|
|
from twisted.internet import defer
|
|
|
|
from twisted.internet.defer import inlineCallbacks
|
2023-08-10 13:39:45 -04:00
|
|
|
from twisted.web.server import NOT_DONE_YET
|
2023-08-10 14:27:33 -04:00
|
|
|
from twisted.web.http import OK, NO_CONTENT
|
2023-08-09 12:19:54 -04:00
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Setting up logging specifically for this module:
|
|
|
|
# 1. Create a file handler to write logs to a specific file.
|
2023-08-13 10:59:19 -04:00
|
|
|
file_handler = logging.FileHandler('/var/log/matrix-synapse/redlight.log')
|
|
|
|
file_handler.setLevel(logging.INFO)
|
2023-08-13 11:32:06 -04:00
|
|
|
# 2. Define the format for the logs.
|
2023-08-13 10:59:19 -04:00
|
|
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
|
file_handler.setFormatter(formatter)
|
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# 3. Initialize the logger for this module and set its level.
|
2023-08-09 12:19:54 -04:00
|
|
|
logger = logging.getLogger(__name__)
|
2023-08-13 10:59:19 -04:00
|
|
|
logger.setLevel(logging.INFO)
|
2023-08-13 11:32:06 -04:00
|
|
|
# 4. Attach the file handler to the logger.
|
2023-08-13 10:59:19 -04:00
|
|
|
logger.addHandler(file_handler)
|
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Prevent this logger's messages from being passed to the root logger or other handlers.
|
2023-08-13 10:59:19 -04:00
|
|
|
logger.propagate = False
|
2023-08-09 12:19:54 -04:00
|
|
|
|
2023-08-15 15:56:42 -04:00
|
|
|
class SourceDataManager:
|
|
|
|
def __init__(self, module, config):
|
|
|
|
self._module = module
|
2023-08-17 13:24:49 -04:00
|
|
|
self._source_repo_url = config.get("redlight_source_repo_url", "")
|
|
|
|
self._git_token = config.get("redlight_git_token", "")
|
|
|
|
self._source_list_file_path = config.get("redlight_source_list_file_path", "dist/summaries.json")
|
2023-08-15 15:56:42 -04:00
|
|
|
self._filtered_tags = config.get("filtered_tags", [])
|
|
|
|
self._source_dict = {}
|
|
|
|
self._source_dict_last_update = None
|
|
|
|
self.update_data()
|
|
|
|
|
|
|
|
def fetch_file_from_gitea(self, repo_url, token, file_path):
|
2023-08-15 18:41:29 -04:00
|
|
|
# Construct the API URL for the file.
|
2023-08-15 15:56:42 -04:00
|
|
|
base_url = repo_url.rstrip("/")
|
|
|
|
api_url = f"{base_url}/contents/{file_path}?ref=main&access_token={token}"
|
|
|
|
|
2023-08-15 18:41:29 -04:00
|
|
|
# Log attempt to fetch the file.
|
2023-08-15 15:56:42 -04:00
|
|
|
logger.info(f"Attempting to update source list, fetching file from: {api_url}")
|
|
|
|
|
|
|
|
response = requests.get(api_url)
|
|
|
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
content_base64 = response.json().get("content")
|
|
|
|
if content_base64:
|
|
|
|
decoded_content = base64.b64decode(content_base64).decode('utf-8')
|
|
|
|
# Log success
|
|
|
|
logger.info(f"Successfully fetched content with length: {len(decoded_content)} characters.")
|
|
|
|
return decoded_content
|
|
|
|
else:
|
|
|
|
error_message = "Content not found in the response!"
|
|
|
|
logger.error(error_message)
|
|
|
|
raise ValueError(error_message)
|
|
|
|
else:
|
|
|
|
error_message = f"Failed to fetch file. Response code: {response.status_code}. Content: {response.content.decode('utf-8')}"
|
|
|
|
logger.error(error_message)
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
|
|
def update_data(self):
|
|
|
|
now = datetime.datetime.now()
|
|
|
|
if not self._source_dict_last_update or (now - self._source_dict_last_update).total_seconds() > 3600:
|
|
|
|
raw_content = self.fetch_file_from_gitea(self._source_repo_url, self._git_token, self._source_list_file_path)
|
|
|
|
content = json.loads(raw_content)
|
|
|
|
|
|
|
|
self._source_dict = {
|
|
|
|
report["room"]["room_id_hash"]: report["report_id"]
|
|
|
|
for report in content
|
|
|
|
if any(tag in self._filtered_tags for tag in report["report_info"]["tags"])
|
|
|
|
}
|
|
|
|
|
|
|
|
self._source_dict_last_update = now
|
2023-08-15 18:41:29 -04:00
|
|
|
logger.info(f"Source data updated. Number of reports matching the filtered tags: {len(self._source_dict)}")
|
2023-08-15 15:56:42 -04:00
|
|
|
|
|
|
|
def get_data(self):
|
|
|
|
self.update_data()
|
|
|
|
return self._source_dict
|
|
|
|
|
2023-08-10 14:27:33 -04:00
|
|
|
class RedlightServerModule:
|
2023-08-09 12:19:54 -04:00
|
|
|
def __init__(self, config: dict, api: ModuleApi):
|
|
|
|
self._api = api
|
2023-08-10 14:27:33 -04:00
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Register a new web endpoint "/_matrix/loj/v1/abuse_lookup" which will be handled by RedlightServerResource.
|
2023-08-09 12:19:54 -04:00
|
|
|
api.register_web_resource(
|
|
|
|
"/_matrix/loj/v1/abuse_lookup",
|
2023-08-15 15:56:42 -04:00
|
|
|
RedlightServerResource(config, self)
|
2023-08-09 12:19:54 -04:00
|
|
|
)
|
|
|
|
|
2023-08-10 14:27:33 -04:00
|
|
|
logger.info("RedlightServerModule initialized.")
|
|
|
|
|
|
|
|
class RedlightServerResource:
|
2023-08-13 11:32:06 -04:00
|
|
|
# This flag helps Twisted identify this as a final resource and not look for children.
|
2023-08-10 13:09:20 -04:00
|
|
|
isLeaf = True
|
|
|
|
|
2023-08-15 15:56:42 -04:00
|
|
|
def __init__(self, config: dict, module):
|
2023-08-09 12:19:54 -04:00
|
|
|
self._module = module
|
2023-08-15 15:56:42 -04:00
|
|
|
self._data_manager = SourceDataManager(module, config)
|
|
|
|
self._source_dict = self._data_manager.get_data()
|
2023-08-17 13:24:49 -04:00
|
|
|
self._client_api_tokens = config.get("redlight_client_tokens", [])
|
|
|
|
self._filtered_tags = config.get("filtered_tags", [])
|
2023-08-15 15:56:42 -04:00
|
|
|
# Logging for debug purposes
|
|
|
|
logger.debug(f"Filtered room_id_hashes: {list(self._source_dict.keys())}")
|
2023-08-09 12:19:54 -04:00
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Handle incoming HTTP requests to the registered endpoint.
|
2023-08-10 13:09:20 -04:00
|
|
|
def render(self, request):
|
2023-08-13 11:32:06 -04:00
|
|
|
# Extract HTTP method (GET, PUT, POST, etc.) from the request.
|
2023-08-10 13:09:20 -04:00
|
|
|
method = request.method.decode('ascii')
|
2023-08-13 11:32:06 -04:00
|
|
|
# Based on the method, try to find the respective handler function.
|
2023-08-10 13:09:20 -04:00
|
|
|
handler = getattr(self, f"on_{method}", None)
|
2023-08-09 12:19:54 -04:00
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# If a handler is found, process the request with it.
|
2023-08-10 13:09:20 -04:00
|
|
|
if handler:
|
2023-08-10 13:39:45 -04:00
|
|
|
def _respond(result):
|
|
|
|
request.write(result)
|
|
|
|
request.finish()
|
|
|
|
|
|
|
|
def _error(failure):
|
|
|
|
logger.error(f"Error processing abuse lookup request: {failure}")
|
|
|
|
request.setResponseCode(500)
|
|
|
|
request.write(json.dumps({"error": "Internal Server Error"}).encode("utf-8"))
|
|
|
|
request.finish()
|
|
|
|
|
|
|
|
d = handler(request)
|
|
|
|
d.addCallbacks(_respond, _error)
|
2023-08-15 18:41:29 -04:00
|
|
|
# Indicates asynchronous processing.
|
2023-08-15 15:56:42 -04:00
|
|
|
return NOT_DONE_YET
|
2023-08-10 13:09:20 -04:00
|
|
|
else:
|
2023-08-13 13:05:16 -04:00
|
|
|
logger.warning(f"Received a request with unsupported method: {method}")
|
2023-08-13 11:32:06 -04:00
|
|
|
# If no handler is found for the method, return "Method Not Allowed".
|
2023-08-10 13:09:20 -04:00
|
|
|
return self.method_not_allowed(request)
|
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Handle PUT requests to the endpoint.
|
2023-08-10 13:09:20 -04:00
|
|
|
@inlineCallbacks
|
|
|
|
def on_PUT(self, request):
|
2023-08-13 13:05:16 -04:00
|
|
|
logger.info(f"Processing PUT request from {request.getClientIP()}.")
|
2023-08-09 12:19:54 -04:00
|
|
|
try:
|
2023-08-13 11:32:06 -04:00
|
|
|
# Read and decode the request body.
|
2023-08-10 13:09:20 -04:00
|
|
|
body = yield request.content.read()
|
|
|
|
content = body.decode("utf-8")
|
|
|
|
logger.info(f"Received abuse lookup request: {content}")
|
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Extract specific data points from the request content.
|
2023-08-09 12:19:54 -04:00
|
|
|
data = json.loads(content)
|
2023-08-12 06:09:42 -04:00
|
|
|
room_id_hash = data["room_id_hash"]
|
|
|
|
user_id_hash = data["user_id_hash"]
|
2023-08-15 18:41:29 -04:00
|
|
|
api_token = data["api_token"]
|
|
|
|
|
|
|
|
# Check if the provided API token is valid.
|
2023-08-17 13:24:49 -04:00
|
|
|
if api_token not in self._client_api_tokens:
|
2023-08-15 18:41:29 -04:00
|
|
|
logger.warning(f"Invalid API token provided by {request.getClientIP()}.")
|
|
|
|
request.setResponseCode(401)
|
|
|
|
defer.returnValue(json.dumps({"error": "Unauthorized"}).encode("utf-8"))
|
|
|
|
return
|
2023-08-10 14:27:33 -04:00
|
|
|
|
2023-08-15 18:41:29 -04:00
|
|
|
# Update and fetch the source_dict when required.
|
2023-08-15 15:56:42 -04:00
|
|
|
source_dict = self._data_manager.get_data()
|
|
|
|
|
2023-08-15 18:41:29 -04:00
|
|
|
# Check for abuse based on the room_id_hash and the filtered source list.
|
2023-08-15 15:56:42 -04:00
|
|
|
is_abuse = room_id_hash in source_dict
|
2023-08-09 12:19:54 -04:00
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Respond based on whether the request is identified as abusive or not.
|
2023-08-09 12:19:54 -04:00
|
|
|
if is_abuse:
|
2023-08-15 15:56:42 -04:00
|
|
|
report_id = source_dict[room_id_hash]
|
|
|
|
logger.warning(f"Abuse detected from {request.getClientIP()}, user_id_hash: {user_id_hash} report_id: {report_id}.")
|
|
|
|
logger.debug(f"room_id_hash: {room_id_hash}.")
|
2023-08-10 13:09:20 -04:00
|
|
|
request.setResponseCode(http.OK)
|
|
|
|
defer.returnValue(json.dumps({
|
2023-08-09 12:19:54 -04:00
|
|
|
"error": None,
|
2023-08-15 15:56:42 -04:00
|
|
|
"report_id": report_id,
|
2023-08-09 12:19:54 -04:00
|
|
|
}).encode("utf-8"))
|
|
|
|
else:
|
2023-08-13 13:05:16 -04:00
|
|
|
logger.info(f"No abuse detected for request from {request.getClientIP()}.")
|
2023-08-10 13:09:20 -04:00
|
|
|
request.setResponseCode(http.NO_CONTENT)
|
|
|
|
defer.returnValue(b"")
|
2023-08-09 12:19:54 -04:00
|
|
|
|
|
|
|
except Exception as e:
|
2023-08-13 13:05:16 -04:00
|
|
|
logger.error(f"Error processing abuse lookup PUT request from {request.getClientIP()}: {e}")
|
2023-08-10 13:09:20 -04:00
|
|
|
request.setResponseCode(400)
|
|
|
|
defer.returnValue(json.dumps({"error": "Bad Request"}).encode("utf-8"))
|
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Handle GET requests (by disallowing them).
|
2023-08-10 13:09:20 -04:00
|
|
|
def on_GET(self, request):
|
|
|
|
return self.method_not_allowed(request)
|
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Handle POST requests (by disallowing them).
|
2023-08-10 13:09:20 -04:00
|
|
|
def on_POST(self, request):
|
|
|
|
return self.method_not_allowed(request)
|
2023-08-09 12:19:54 -04:00
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# General method to respond with "Method Not Allowed" for disallowed or unrecognized HTTP methods.
|
2023-08-10 13:09:20 -04:00
|
|
|
def method_not_allowed(self, request):
|
2023-08-13 13:05:16 -04:00
|
|
|
logger.warning(f"Method Not Allowed: {request.method.decode('ascii')} from {request.getClientIP()}.")
|
2023-08-10 13:09:20 -04:00
|
|
|
request.setResponseCode(405)
|
|
|
|
return json.dumps({"error": "Method Not Allowed"}).encode("utf-8")
|
2023-08-09 12:19:54 -04:00
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Function to parse the configuration for this module.
|
2023-08-09 12:19:54 -04:00
|
|
|
def parse_config(config: dict) -> dict:
|
|
|
|
return config
|
|
|
|
|
2023-08-13 11:32:06 -04:00
|
|
|
# Factory function to create and return an instance of the RedlightServerModule.
|
2023-08-10 14:27:33 -04:00
|
|
|
def create_module(api: ModuleApi, config: dict) -> RedlightServerModule:
|
|
|
|
return RedlightServerModule(config, api)
|