Source code for webfinger

"""Handles requests for WebFinger endpoints.

* https://webfinger.net/
* https://tools.ietf.org/html/rfc7033
"""
from datetime import timedelta
import logging
import re
from urllib.parse import urljoin, urlparse

from flask import render_template, request
from granary import as2
from webutil import flask_util, util
from webutil.flask_util import error, flash, Found
from webutil.util import json_dumps, json_loads
from requests import RequestException

import activitypub
import common
from common import CACHE_CONTROL, CACHE_CONTROL_VARY_ACCEPT
import domains
from domains import (
    LOCAL_DOMAINS,
    PRIMARY_DOMAIN,
    PROTOCOL_DOMAINS,
    subdomain_wrap,
    SUPERDOMAIN,
)
from flask_app import app
import memcache
import models
from protocol import Protocol
import web
from web import Web

SUBSCRIBE_LINK_REL = 'http://ostatus.org/schema/1.0/subscribe'

logger = logging.getLogger(__name__)


[docs] class Webfinger(flask_util.XrdOrJrd): """Serves a user's WebFinger profile. Supports both JRD and XRD; defaults to JRD. https://tools.ietf.org/html/rfc7033#section-4 """ @memcache.memoize(expire=timedelta(hours=1), key=lambda self: (request.url, request.headers.get('Accept'))) @flask_util.headers(CACHE_CONTROL_VARY_ACCEPT) def dispatch_request(self): return super().dispatch_request() def template_prefix(self): return 'webfinger_user' def template_vars(self): # logger.debug(f'Headers: {list(request.headers.items())}') resource = flask_util.get_required_param('resource').strip() resource = resource.removeprefix(domains.host_url()) # handle Bridgy Fed actor URLs, eg https://fed.brid.gy/snarfed.org host = util.domain_from_link(domains.host_url()) if resource in ('', '/', f'acct:{host}', f'acct:@{host}'): error('Expected other domain, not *.brid.gy') proto = None try: username, server = util.parse_acct_uri(resource) id = server proto = Protocol.for_bridgy_subdomain(id, fed='web') if proto: id = username except ValueError: id = username = server = urlparse(resource).netloc or resource if id == PRIMARY_DOMAIN or id in PROTOCOL_DOMAINS: proto = Web elif not proto: proto = Protocol.for_request(fed='web') if not proto: error(f"Couldn't determine protocol for f{resource}") logger.debug(f'Protocol {proto.LABEL}, user id {id}') try: user = models.load_user(id, proto=proto) except RuntimeError as e: error(f'No {proto.LABEL} user found for {id}', status=404) if (not user.is_enabled(activitypub.ActivityPub) or (proto == Web and username not in (user.key.id(), user.username()))): error(f'No {proto.LABEL} user found for {id}', status=404) ap_handle = user.handle_as('activitypub') if not ap_handle: error(f'{proto.LABEL} user {id} has no handle', status=404) # backward compatibility for initial Web users whose AP actor ids are on # fed.brid.gy, not web.brid.gy subdomain = request.host.split('.')[0] if (user.LABEL == 'web' and subdomain not in (LOCAL_DOMAINS + (user.ap_subdomain,))): url = urljoin(f'https://{user.ap_subdomain}{SUPERDOMAIN}/', request.full_path) raise Found(location=url) actor = user.obj.as1 if user.obj and user.obj.as1 else {} logger.debug(f'Generating WebFinger data for {user.key.id()}') actor_id = user.id_as(activitypub.ActivityPub) logger.debug(f'AS1 actor: {actor_id}') urls = util.dedupe_urls(util.get_list(actor, 'urls') + util.get_list(actor, 'url') + [user.web_url()]) logger.debug(f'URLs: {urls}') canonical_url = urls[0] # generate webfinger content data = util.trim_nulls({ 'subject': 'acct:' + ap_handle.lstrip('@'), 'aliases': urls, 'links': [{ 'rel': 'http://webfinger.net/rel/profile-page', 'type': 'text/html', 'href': url, } for url in urls if util.is_web(url)] + [{ 'rel': 'http://webfinger.net/rel/avatar', 'href': url, } for url in util.get_urls(actor, 'image')] + [{ 'rel': 'canonical_uri', 'type': 'text/html', 'href': canonical_url, }, # ActivityPub # # include two self links, one for each AP content type, since some # fediverse servers (eg Pleroma, Akkoma) are unnecessarily picky # about which one they use from XRD vs JRD. # https://github.com/snarfed/bridgy-fed/issues/995 { 'rel': 'self', 'type': as2.CONTENT_TYPE_LD_PROFILE, 'href': actor_id, }, { 'rel': 'self', 'type': as2.CONTENT_TYPE, 'href': actor_id, }, { # AP reads this and sharedInbox from the AS2 actor, not # webfinger, so strictly speaking, it's probably not needed here. 'rel': 'inbox', 'type': as2.CONTENT_TYPE_LD_PROFILE, 'href': actor_id + '/inbox', }, { # https://www.w3.org/TR/activitypub/#sharedInbox 'rel': 'sharedInbox', 'type': as2.CONTENT_TYPE_LD_PROFILE, 'href': subdomain_wrap(proto, '/ap/sharedInbox'), }, # remote follow # https://socialhub.activitypub.rocks/t/what-is-the-current-spec-for-remote-follow/2020/11?u=snarfed # https://github.com/snarfed/bridgy-fed/issues/60#issuecomment-1325589750 { 'rel': 'http://ostatus.org/schema/1.0/subscribe', # always use fed.brid.gy for UI pages, not protocol subdomain # TODO: switch to: # 'template': domains.host_url(user.user_page_path('?url={uri}')), # the problem is that user_page_path() uses handle_or_id, which uses # custom username instead of domain, which may not be unique 'template': f'https://{PRIMARY_DOMAIN}' + user.user_page_path('?url={uri}'), }] }) # logger.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}') return data
[docs] class HostMeta(flask_util.XrdOrJrd): """Renders and serves the ``/.well-known/host-meta`` file. Supports both JRD and XRD; defaults to XRD. https://tools.ietf.org/html/rfc6415#section-3 """ DEFAULT_TYPE = flask_util.XrdOrJrd.XRD def template_prefix(self): return 'host-meta' def template_vars(self): return {'host_uri': domains.host_url()}
[docs] @flask_util.headers(CACHE_CONTROL) def dispatch_request(self, **kwargs): """Add the Cache-Control header.""" return super().dispatch_request(**kwargs)
[docs] @app.get('/.well-known/host-meta.xrds') @flask_util.headers(CACHE_CONTROL) def host_meta_xrds(): """Renders and serves the ``/.well-known/host-meta.xrds`` XRDS-Simple file.""" return render_template('host-meta.xrds', host_uri=domains.host_url()), { 'Content-Type': 'application/xrds+xml', }
[docs] def fetch(addr): """Fetches and returns an address's WebFinger data. On failure, flashes a message and returns None. TODO: switch to raising exceptions instead of flashing messages and returning None Args: addr (str): a Webfinger-compatible address, eg ``@x@y``, ``acct:x@y``, or ``https://x/y`` Returns: dict: fetched WebFinger data, or None on error """ addr = addr.strip().strip('@') split = addr.split('@') if len(split) == 2: addr_domain = split[1] resource = f'acct:{addr}' elif util.is_web(addr): addr_domain = util.domain_from_link(addr, minimize=False) resource = addr else: flash('Enter a fediverse address in @user@domain.social format') return None try: resp = util.requests_get( f'https://{addr_domain}/.well-known/webfinger?resource={resource}') except RequestException as e: flash(f"Couldn't fetch {addr}: {e}") return None except BaseException as e: if util.is_connection_failure(e): flash(f"Couldn't connect to {addr_domain}") return None raise if not resp.ok: flash(f'WebFinger on {addr_domain} returned HTTP {resp.status_code}') return None try: data = resp.json() except ValueError as e: logger.warning(f'Got {e}', exc_info=True) flash(f'WebFinger on {addr_domain} returned non-JSON') return None logger.debug(f'Got WebFinger for {addr}') return data
[docs] def fetch_actor_url(addr): """Fetches and returns a WebFinger address's ActivityPub actor URL. On failure, flashes a message and returns None. Args: addr (str): a Webfinger-compatible address, eg ``@x@y``, ``acct:x@y``, or ``https://x/y`` Returns: str: ActivityPub actor URL, or None on error or not found """ data = fetch(addr) if not data: return None for link in data.get('links', []): type = link.get('type', '').split(';')[0] if link.get('rel') == 'self' and type in as2.CONTENT_TYPES: return link.get('href')
app.add_url_rule('/.well-known/webfinger', view_func=Webfinger.as_view('webfinger')) app.add_url_rule('/.well-known/host-meta', view_func=HostMeta.as_view('hostmeta')) app.add_url_rule('/.well-known/host-meta.json', view_func=HostMeta.as_view('hostmeta-json'))