diff --git a/api/config/settings/common.py b/api/config/settings/common.py
index 23d90cb6d..8af05491f 100644
--- a/api/config/settings/common.py
+++ b/api/config/settings/common.py
@@ -580,6 +580,11 @@ CELERY_BROKER_URL = env(
CELERY_TASK_DEFAULT_RATE_LIMIT = 1
CELERY_TASK_TIME_LIMIT = 300
CELERY_BEAT_SCHEDULE = {
+ "audio.fetch_rss_feeds": {
+ "task": "audio.fetch_rss_feeds",
+ "schedule": crontab(minute="0", hour="*"),
+ "options": {"expires": 60 * 60},
+ },
"common.prune_unattached_attachments": {
"task": "common.prune_unattached_attachments",
"schedule": crontab(minute="0", hour="*"),
@@ -976,3 +981,11 @@ MIN_DELAY_BETWEEN_DOWNLOADS_COUNT = env.int(
MARKDOWN_EXTENSIONS = env.list("MARKDOWN_EXTENSIONS", default=["nl2br", "extra"])
LINKIFIER_SUPPORTED_TLDS = ["audio"] + env.list("LINKINFIER_SUPPORTED_TLDS", default=[])
+EXTERNAL_MEDIA_PROXY_ENABLED = env.bool("EXTERNAL_MEDIA_PROXY_ENABLED", default=True)
+
+# By default, only people who subscribe to a podcast RSS will have access to it
+# switch to "instance" or "everyone" to change that
+PODCASTS_THIRD_PARTY_VISIBILITY = env("PODCASTS_THIRD_PARTY_VISIBILITY", default="me")
+PODCASTS_RSS_FEED_REFRESH_DELAY = env.int(
+ "PODCASTS_RSS_FEED_REFRESH_DELAY", default=60 * 60 * 24
+)
diff --git a/api/funkwhale_api/audio/categories.py b/api/funkwhale_api/audio/categories.py
index 56a748a53..a026425fc 100644
--- a/api/funkwhale_api/audio/categories.py
+++ b/api/funkwhale_api/audio/categories.py
@@ -109,3 +109,5 @@ ITUNES_CATEGORIES = {
"TV Reviews",
],
}
+
+ITUNES_SUBCATEGORIES = [s for p in ITUNES_CATEGORIES.values() for s in p]
diff --git a/api/funkwhale_api/audio/factories.py b/api/funkwhale_api/audio/factories.py
index 6a7c56745..7e2a4bfae 100644
--- a/api/funkwhale_api/audio/factories.py
+++ b/api/funkwhale_api/audio/factories.py
@@ -1,6 +1,9 @@
+import uuid
+
import factory
from funkwhale_api.factories import registry, NoUpdateOnCreate
+from funkwhale_api.federation import actors
from funkwhale_api.federation import factories as federation_factories
from funkwhale_api.music import factories as music_factories
@@ -11,6 +14,10 @@ def set_actor(o):
return models.generate_actor(str(o.uuid))
+def get_rss_channel_name():
+ return "rssfeed-{}".format(uuid.uuid4())
+
+
@registry.register
class ChannelFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
uuid = factory.Faker("uuid4")
@@ -32,10 +39,20 @@ class ChannelFactory(NoUpdateOnCreate, factory.django.DjangoModelFactory):
model = "audio.Channel"
class Params:
+ external = factory.Trait(
+ attributed_to=factory.LazyFunction(actors.get_service_actor),
+ library__privacy_level="me",
+ actor=factory.SubFactory(
+ federation_factories.ActorFactory,
+ local=True,
+ preferred_username=factory.LazyFunction(get_rss_channel_name),
+ ),
+ )
local = factory.Trait(
attributed_to=factory.SubFactory(
federation_factories.ActorFactory, local=True
),
+ library__privacy_level="everyone",
artist__local=True,
)
diff --git a/api/funkwhale_api/audio/models.py b/api/funkwhale_api/audio/models.py
index bdf700f78..38e023c4a 100644
--- a/api/funkwhale_api/audio/models.py
+++ b/api/funkwhale_api/audio/models.py
@@ -19,6 +19,19 @@ def empty_dict():
return {}
+class ChannelQuerySet(models.QuerySet):
+ def external_rss(self, include=True):
+ from funkwhale_api.federation import actors
+
+ query = models.Q(
+ attributed_to=actors.get_service_actor(),
+ actor__preferred_username__startswith="rssfeed-",
+ )
+ if include:
+ return self.filter(query)
+ return self.exclude(query)
+
+
class Channel(models.Model):
uuid = models.UUIDField(default=uuid.uuid4, unique=True)
artist = models.OneToOneField(
@@ -45,6 +58,8 @@ class Channel(models.Model):
default=empty_dict, max_length=50000, encoder=DjangoJSONEncoder, blank=True
)
+ objects = ChannelQuerySet.as_manager()
+
def get_absolute_url(self):
suffix = self.uuid
if self.actor.is_local:
@@ -54,7 +69,9 @@ class Channel(models.Model):
return federation_utils.full_url("/channels/{}".format(suffix))
def get_rss_url(self):
- if not self.artist.is_local:
+ if not self.artist.is_local or self.actor.preferred_username.startswith(
+ "rssfeed-"
+ ):
return self.rss_url
return federation_utils.full_url(
@@ -81,5 +98,6 @@ def generate_actor(username, **kwargs):
@receiver(post_delete, sender=Channel)
def delete_channel_related_objs(instance, **kwargs):
instance.library.delete()
- instance.actor.delete()
+ if instance.actor != instance.attributed_to:
+ instance.actor.delete()
instance.artist.delete()
diff --git a/api/funkwhale_api/audio/renderers.py b/api/funkwhale_api/audio/renderers.py
index 0a8e71d6b..c5c49ecce 100644
--- a/api/funkwhale_api/audio/renderers.py
+++ b/api/funkwhale_api/audio/renderers.py
@@ -21,12 +21,16 @@ class PodcastRSSRenderer(renderers.JSONRenderer):
}
final.update(data)
tree = dict_to_xml_tree("rss", final)
- return b'\n' + ET.tostring(
- tree, encoding="utf-8"
- )
+ return render_xml(tree)
class PodcastRSSContentNegociation(negotiation.DefaultContentNegotiation):
def select_renderer(self, request, renderers, format_suffix=None):
return (PodcastRSSRenderer(), PodcastRSSRenderer.media_type)
+
+
+def render_xml(tree):
+ return b'\n' + ET.tostring(
+ tree, encoding="utf-8"
+ )
diff --git a/api/funkwhale_api/audio/serializers.py b/api/funkwhale_api/audio/serializers.py
index 4a093ad8b..2f6b76442 100644
--- a/api/funkwhale_api/audio/serializers.py
+++ b/api/funkwhale_api/audio/serializers.py
@@ -1,17 +1,32 @@
+import datetime
+import logging
+import time
+import uuid
+
from django.conf import settings
from django.db import transaction
+from django.db.models import Q
+from django.utils import timezone
+
+import feedparser
+import requests
+import pytz
from rest_framework import serializers
from django.templatetags.static import static
+from django.urls import reverse
from funkwhale_api.common import serializers as common_serializers
from funkwhale_api.common import utils as common_utils
from funkwhale_api.common import locales
from funkwhale_api.common import preferences
+from funkwhale_api.common import session
+from funkwhale_api.federation import actors
from funkwhale_api.federation import models as federation_models
from funkwhale_api.federation import serializers as federation_serializers
from funkwhale_api.federation import utils as federation_utils
+from funkwhale_api.moderation import mrf
from funkwhale_api.music import models as music_models
from funkwhale_api.music import serializers as music_serializers
from funkwhale_api.tags import models as tags_models
@@ -22,6 +37,9 @@ from . import categories
from . import models
+logger = logging.getLogger(__name__)
+
+
class ChannelMetadataSerializer(serializers.Serializer):
itunes_category = serializers.ChoiceField(
choices=categories.ITUNES_CATEGORIES, required=True
@@ -218,7 +236,7 @@ class ChannelUpdateSerializer(serializers.Serializer):
class ChannelSerializer(serializers.ModelSerializer):
artist = serializers.SerializerMethodField()
- actor = federation_serializers.APIActorSerializer()
+ actor = serializers.SerializerMethodField()
attributed_to = federation_serializers.APIActorSerializer()
rss_url = serializers.CharField(source="get_rss_url")
@@ -246,6 +264,11 @@ class ChannelSerializer(serializers.ModelSerializer):
def get_subscriptions_count(self, obj):
return obj.actor.received_follows.exclude(approved=False).count()
+ def get_actor(self, obj):
+ if obj.attributed_to == actors.get_service_actor():
+ return None
+ return federation_serializers.APIActorSerializer(obj.actor).data
+
class SubscriptionSerializer(serializers.Serializer):
approved = serializers.BooleanField(read_only=True)
@@ -259,11 +282,475 @@ class SubscriptionSerializer(serializers.Serializer):
return data
+class RssSubscribeSerializer(serializers.Serializer):
+ url = serializers.URLField()
+
+
+class FeedFetchException(Exception):
+ pass
+
+
+class BlockedFeedException(FeedFetchException):
+ pass
+
+
+def retrieve_feed(url):
+ try:
+ logger.info("Fetching RSS feed at %s", url)
+ response = session.get_session().get(url)
+ response.raise_for_status()
+ except requests.exceptions.HTTPError as e:
+ if e.response:
+ raise FeedFetchException(
+ "Error while fetching feed: HTTP {}".format(e.response.status_code)
+ )
+ raise FeedFetchException("Error while fetching feed: unknown error")
+ except requests.exceptions.Timeout:
+ raise FeedFetchException("Error while fetching feed: timeout")
+ except requests.exceptions.ConnectionError:
+ raise FeedFetchException("Error while fetching feed: connection error")
+ except requests.RequestException as e:
+ raise FeedFetchException("Error while fetching feed: {}".format(e))
+ except Exception as e:
+ raise FeedFetchException("Error while fetching feed: {}".format(e))
+
+ return response
+
+
+@transaction.atomic
+def get_channel_from_rss_url(url):
+ # first, check if the url is blocked
+ is_valid, _ = mrf.inbox.apply({"id": url})
+ if not is_valid:
+ logger.warn("Feed fetch for url %s dropped by MRF", url)
+ raise BlockedFeedException("This feed or domain is blocked")
+
+ # retrieve the XML payload at the given URL
+ response = retrieve_feed(url)
+
+ parsed_feed = feedparser.parse(response.text)
+ serializer = RssFeedSerializer(data=parsed_feed["feed"])
+ if not serializer.is_valid():
+ raise FeedFetchException("Invalid xml content: {}".format(serializer.errors))
+
+ # second mrf check with validated data
+ urls_to_check = set()
+ atom_link = serializer.validated_data.get("atom_link")
+
+ if atom_link and atom_link != url:
+ urls_to_check.add(atom_link)
+
+ if serializer.validated_data["link"] != url:
+ urls_to_check.add(serializer.validated_data["link"])
+
+ for u in urls_to_check:
+ is_valid, _ = mrf.inbox.apply({"id": u})
+ if not is_valid:
+ logger.warn("Feed fetch for url %s dropped by MRF", u)
+ raise BlockedFeedException("This feed or domain is blocked")
+
+ # now, we're clear, we can save the data
+ channel = serializer.save(rss_url=url)
+
+ entries = parsed_feed.entries or []
+ uploads = []
+ track_defaults = {}
+ existing_uploads = list(
+ channel.library.uploads.all().select_related(
+ "track__description", "track__attachment_cover"
+ )
+ )
+ if parsed_feed.feed.rights:
+ track_defaults["copyright"] = parsed_feed.feed.rights
+ for entry in entries:
+ logger.debug("Importing feed item %s", entry.id)
+ s = RssFeedItemSerializer(data=entry)
+ if not s.is_valid():
+ logger.debug("Skipping invalid RSS feed item %s", entry)
+ continue
+ uploads.append(
+ s.save(channel, existing_uploads=existing_uploads, **track_defaults)
+ )
+
+ common_utils.on_commit(
+ music_models.TrackActor.create_entries,
+ library=channel.library,
+ delete_existing=True,
+ )
+
+ return channel, uploads
+
+
# RSS related stuff
# https://github.com/simplepie/simplepie-ng/wiki/Spec:-iTunes-Podcast-RSS
# is extremely useful
+class RssFeedSerializer(serializers.Serializer):
+ title = serializers.CharField()
+ link = serializers.URLField()
+ language = serializers.CharField(required=False, allow_blank=True)
+ rights = serializers.CharField(required=False, allow_blank=True)
+ itunes_explicit = serializers.BooleanField(required=False, allow_null=True)
+ tags = serializers.ListField(required=False)
+ atom_link = serializers.DictField(required=False)
+ summary_detail = serializers.DictField(required=False)
+ author_detail = serializers.DictField(required=False)
+ image = serializers.DictField(required=False)
+
+ def validate_atom_link(self, v):
+ if (
+ v.get("rel", "self") == "self"
+ and v.get("type", "application/rss+xml") == "application/rss+xml"
+ ):
+ return v["href"]
+
+ def validate_summary_detail(self, v):
+ content = v.get("value")
+ if not content:
+ return
+ return {
+ "content_type": v.get("type", "text/plain"),
+ "text": content,
+ }
+
+ def validate_image(self, v):
+ url = v.get("href")
+ if url:
+ return {
+ "url": url,
+ "mimetype": common_utils.get_mimetype_from_ext(url) or "image/jpeg",
+ }
+
+ def validate_tags(self, v):
+ data = {}
+ for row in v:
+ if row.get("scheme") != "http://www.itunes.com/":
+ continue
+ term = row["term"]
+ if "parent" not in data and term in categories.ITUNES_CATEGORIES:
+ data["parent"] = term
+ elif "child" not in data and term in categories.ITUNES_SUBCATEGORIES:
+ data["child"] = term
+ elif (
+ term not in categories.ITUNES_SUBCATEGORIES
+ and term not in categories.ITUNES_CATEGORIES
+ ):
+ raw_tags = term.split(" ")
+ data["tags"] = []
+ tag_serializer = tags_serializers.TagNameField()
+ for tag in raw_tags:
+ try:
+ data["tags"].append(tag_serializer.to_internal_value(tag))
+ except Exception:
+ pass
+
+ return data
+
+ @transaction.atomic
+ def save(self, rss_url):
+ validated_data = self.validated_data
+ # because there may be redirections from the original feed URL
+ real_rss_url = validated_data.get("atom_link", rss_url) or rss_url
+ service_actor = actors.get_service_actor()
+ author = validated_data.get("author_detail", {})
+ categories = validated_data.get("tags", {})
+ metadata = {
+ "explicit": validated_data.get("itunes_explicit", False),
+ "copyright": validated_data.get("rights"),
+ "owner_name": author.get("name"),
+ "owner_email": author.get("email"),
+ "itunes_category": categories.get("parent"),
+ "itunes_subcategory": categories.get("child"),
+ "language": validated_data.get("language"),
+ }
+ public_url = validated_data["link"]
+ existing = (
+ models.Channel.objects.external_rss()
+ .filter(
+ Q(rss_url=real_rss_url) | Q(rss_url=rss_url) | Q(actor__url=public_url)
+ )
+ .first()
+ )
+ channel_defaults = {
+ "rss_url": real_rss_url,
+ "metadata": metadata,
+ }
+ if existing:
+ artist_kwargs = {"channel": existing}
+ actor_kwargs = {"channel": existing}
+ actor_defaults = {"url": public_url}
+ else:
+ artist_kwargs = {"pk": None}
+ actor_kwargs = {"pk": None}
+ preferred_username = "rssfeed-{}".format(uuid.uuid4())
+ actor_defaults = {
+ "preferred_username": preferred_username,
+ "type": "Application",
+ "domain": service_actor.domain,
+ "url": public_url,
+ "fid": federation_utils.full_url(
+ reverse(
+ "federation:actors-detail",
+ kwargs={"preferred_username": preferred_username},
+ )
+ ),
+ }
+ channel_defaults["attributed_to"] = service_actor
+
+ actor_defaults["last_fetch_date"] = timezone.now()
+
+ # create/update the artist profile
+ artist, created = music_models.Artist.objects.update_or_create(
+ **artist_kwargs,
+ defaults={
+ "attributed_to": service_actor,
+ "name": validated_data["title"],
+ "content_category": "podcast",
+ },
+ )
+
+ cover = validated_data.get("image")
+
+ if cover:
+ common_utils.attach_file(artist, "attachment_cover", cover)
+ tags = categories.get("tags", [])
+
+ if tags:
+ tags_models.set_tags(artist, *tags)
+
+ summary = validated_data.get("summary_detail")
+ if summary:
+ common_utils.attach_content(artist, "description", summary)
+
+ if created:
+ channel_defaults["artist"] = artist
+
+ # create/update the actor
+ actor, created = federation_models.Actor.objects.update_or_create(
+ **actor_kwargs, defaults=actor_defaults
+ )
+ if created:
+ channel_defaults["actor"] = actor
+
+ # create the library
+ if not existing:
+ channel_defaults["library"] = music_models.Library.objects.create(
+ actor=service_actor,
+ privacy_level=settings.PODCASTS_THIRD_PARTY_VISIBILITY,
+ name=actor_defaults["preferred_username"],
+ )
+
+ # create/update the channel
+ channel, created = models.Channel.objects.update_or_create(
+ pk=existing.pk if existing else None, defaults=channel_defaults,
+ )
+ return channel
+
+
+class ItunesDurationField(serializers.CharField):
+ def to_internal_value(self, v):
+ try:
+ return int(v)
+ except (ValueError, TypeError):
+ pass
+ parts = v.split(":")
+ int_parts = []
+ for part in parts:
+ try:
+ int_parts.append(int(part))
+ except (ValueError, TypeError):
+ raise serializers.ValidationError("Invalid duration {}".format(v))
+
+ if len(int_parts) == 2:
+ hours = 0
+ minutes, seconds = int_parts
+ elif len(int_parts) == 3:
+ hours, minutes, seconds = int_parts
+ else:
+ raise serializers.ValidationError("Invalid duration {}".format(v))
+
+ return (hours * 3600) + (minutes * 60) + seconds
+
+
+class DummyField(serializers.Field):
+ def to_internal_value(self, v):
+ return v
+
+
+def get_cached_upload(uploads, expected_track_uuid):
+ for upload in uploads:
+ if upload.track.uuid == expected_track_uuid:
+ return upload
+
+
+class RssFeedItemSerializer(serializers.Serializer):
+ id = serializers.CharField()
+ title = serializers.CharField()
+ rights = serializers.CharField(required=False, allow_blank=True)
+ itunes_season = serializers.IntegerField(required=False)
+ itunes_episode = serializers.IntegerField(required=False)
+ itunes_duration = ItunesDurationField()
+ links = serializers.ListField()
+ tags = serializers.ListField(required=False)
+ summary_detail = serializers.DictField(required=False)
+ published_parsed = DummyField(required=False)
+ image = serializers.DictField(required=False)
+
+ def validate_summary_detail(self, v):
+ content = v.get("value")
+ if not content:
+ return
+ return {
+ "content_type": v.get("type", "text/plain"),
+ "text": content,
+ }
+
+ def validate_image(self, v):
+ url = v.get("href")
+ if url:
+ return {
+ "url": url,
+ "mimetype": common_utils.get_mimetype_from_ext(url) or "image/jpeg",
+ }
+
+ def validate_links(self, v):
+ data = {}
+ for row in v:
+ if not row.get("type", "").startswith("audio/"):
+ continue
+ if row.get("rel") != "enclosure":
+ continue
+ try:
+ size = int(row.get("length"))
+ except (TypeError, ValueError):
+ raise serializers.ValidationError("Invalid size")
+
+ data["audio"] = {
+ "mimetype": row["type"],
+ "size": size,
+ "source": row["href"],
+ }
+
+ if not data:
+ raise serializers.ValidationError("No valid audio enclosure found")
+
+ return data
+
+ def validate_tags(self, v):
+ data = {}
+ for row in v:
+ if row.get("scheme") != "http://www.itunes.com/":
+ continue
+ term = row["term"]
+ raw_tags = term.split(" ")
+ data["tags"] = []
+ tag_serializer = tags_serializers.TagNameField()
+ for tag in raw_tags:
+ try:
+ data["tags"].append(tag_serializer.to_internal_value(tag))
+ except Exception:
+ pass
+
+ return data
+
+ @transaction.atomic
+ def save(self, channel, existing_uploads=[], **track_defaults):
+ validated_data = self.validated_data
+ categories = validated_data.get("tags", {})
+ expected_uuid = uuid.uuid3(
+ uuid.NAMESPACE_URL, "rss://{}-{}".format(channel.pk, validated_data["id"])
+ )
+ existing_upload = get_cached_upload(existing_uploads, expected_uuid)
+ if existing_upload:
+ existing_track = existing_upload.track
+ else:
+ existing_track = (
+ music_models.Track.objects.filter(
+ uuid=expected_uuid, artist__channel=channel
+ )
+ .select_related("description", "attachment_cover")
+ .first()
+ )
+ if existing_track:
+ existing_upload = existing_track.uploads.filter(
+ library=channel.library
+ ).first()
+
+ track_defaults = track_defaults
+ track_defaults.update(
+ {
+ "disc_number": validated_data.get("itunes_season", 1),
+ "position": validated_data.get("itunes_episode", 1),
+ "title": validated_data["title"],
+ "artist": channel.artist,
+ }
+ )
+ if "rights" in validated_data:
+ track_defaults["rights"] = validated_data["rights"]
+
+ if "published_parsed" in validated_data:
+ track_defaults["creation_date"] = datetime.datetime.fromtimestamp(
+ time.mktime(validated_data["published_parsed"])
+ ).replace(tzinfo=pytz.utc)
+
+ upload_defaults = {
+ "source": validated_data["links"]["audio"]["source"],
+ "size": validated_data["links"]["audio"]["size"],
+ "mimetype": validated_data["links"]["audio"]["mimetype"],
+ "duration": validated_data["itunes_duration"],
+ "import_status": "finished",
+ "library": channel.library,
+ }
+ if existing_track:
+ track_kwargs = {"pk": existing_track.pk}
+ upload_kwargs = {"track": existing_track}
+ else:
+ track_kwargs = {"pk": None}
+ track_defaults["uuid"] = expected_uuid
+ upload_kwargs = {"pk": None}
+
+ if existing_upload and existing_upload.source != upload_defaults["source"]:
+ # delete existing upload, the url to the audio file has changed
+ existing_upload.delete()
+
+ # create/update the track
+ track, created = music_models.Track.objects.update_or_create(
+ **track_kwargs, defaults=track_defaults,
+ )
+ # optimisation for reducing SQL queries, because we cannot use select_related with
+ # update or create, so we restore the cache by hand
+ if existing_track:
+ for field in ["attachment_cover", "description"]:
+ cached_id_value = getattr(existing_track, "{}_id".format(field))
+ new_id_value = getattr(track, "{}_id".format(field))
+ if new_id_value and cached_id_value == new_id_value:
+ setattr(track, field, getattr(existing_track, field))
+
+ cover = validated_data.get("image")
+
+ if cover:
+ common_utils.attach_file(track, "attachment_cover", cover)
+ tags = categories.get("tags", [])
+
+ if tags:
+ tags_models.set_tags(track, *tags)
+
+ summary = validated_data.get("summary_detail")
+ if summary:
+ common_utils.attach_content(track, "description", summary)
+
+ if created:
+ upload_defaults["track"] = track
+
+ # create/update the upload
+ upload, created = music_models.Upload.objects.update_or_create(
+ **upload_kwargs, defaults=upload_defaults
+ )
+
+ return upload
+
+
def rss_date(dt):
return dt.strftime("%a, %d %b %Y %H:%M:%S %z")
@@ -344,7 +831,12 @@ def rss_serialize_channel(channel):
"href": channel.get_rss_url(),
"rel": "self",
"type": "application/rss+xml",
- }
+ },
+ {
+ "href": channel.actor.fid,
+ "rel": "alternate",
+ "type": "application/activity+json",
+ },
],
}
if language:
diff --git a/api/funkwhale_api/audio/tasks.py b/api/funkwhale_api/audio/tasks.py
new file mode 100644
index 000000000..96e216338
--- /dev/null
+++ b/api/funkwhale_api/audio/tasks.py
@@ -0,0 +1,51 @@
+import datetime
+import logging
+
+from django.conf import settings
+from django.db import transaction
+from django.utils import timezone
+
+from funkwhale_api.taskapp import celery
+
+from . import models
+from . import serializers
+
+logger = logging.getLogger(__name__)
+
+
+@celery.app.task(name="audio.fetch_rss_feeds")
+def fetch_rss_feeds():
+ limit = timezone.now() - datetime.timedelta(
+ seconds=settings.PODCASTS_RSS_FEED_REFRESH_DELAY
+ )
+ candidates = (
+ models.Channel.objects.external_rss()
+ .filter(actor__last_fetch_date__lte=limit)
+ .values_list("rss_url", flat=True)
+ )
+
+ total = len(candidates)
+ logger.info("Refreshing %s rss feeds…", total)
+ for url in candidates:
+ fetch_rss_feed.delay(rss_url=url)
+
+
+@celery.app.task(name="audio.fetch_rss_feed")
+@transaction.atomic
+def fetch_rss_feed(rss_url):
+ channel = (
+ models.Channel.objects.external_rss()
+ .filter(rss_url=rss_url)
+ .order_by("id")
+ .first()
+ )
+ if not channel:
+ logger.warn("Cannot refresh non external feed")
+ return
+
+ try:
+ serializers.get_channel_from_rss_url(rss_url)
+ except serializers.BlockedFeedException:
+ # channel was blocked since last fetch, let's delete it
+ logger.info("Deleting blocked channel linked to %s", rss_url)
+ channel.delete()
diff --git a/api/funkwhale_api/audio/views.py b/api/funkwhale_api/audio/views.py
index 974797c35..eb6b9d001 100644
--- a/api/funkwhale_api/audio/views.py
+++ b/api/funkwhale_api/audio/views.py
@@ -8,12 +8,12 @@ from rest_framework import viewsets
from django import http
from django.db import transaction
from django.db.models import Count, Prefetch, Q
-from django.db.utils import IntegrityError
from funkwhale_api.common import locales
from funkwhale_api.common import permissions
from funkwhale_api.common import preferences
from funkwhale_api.common.mixins import MultipleLookupDetailMixin
+from funkwhale_api.federation import actors
from funkwhale_api.federation import models as federation_models
from funkwhale_api.federation import routes
from funkwhale_api.federation import utils as federation_utils
@@ -100,17 +100,19 @@ class ChannelViewSet(
)
def subscribe(self, request, *args, **kwargs):
object = self.get_object()
- subscription = federation_models.Follow(
- target=object.actor, approved=True, actor=request.user.actor,
- )
+ subscription = federation_models.Follow(actor=request.user.actor)
subscription.fid = subscription.get_federation_id()
- try:
- subscription.save()
- except IntegrityError:
- # there's already a subscription for this actor/channel
- subscription = object.actor.received_follows.filter(
- actor=request.user.actor
- ).get()
+ subscription, created = SubscriptionsViewSet.queryset.get_or_create(
+ target=object.actor,
+ actor=request.user.actor,
+ defaults={
+ "approved": True,
+ "fid": subscription.fid,
+ "uuid": subscription.uuid,
+ },
+ )
+ # prefetch stuff
+ subscription = SubscriptionsViewSet.queryset.get(pk=subscription.pk)
data = serializers.SubscriptionSerializer(subscription).data
return response.Response(data, status=201)
@@ -135,6 +137,10 @@ class ChannelViewSet(
if not object.attributed_to.is_local:
return response.Response({"detail": "Not found"}, status=404)
+ if object.attributed_to == actors.get_service_actor():
+ # external feed, we redirect to the canonical one
+ return http.HttpResponseRedirect(object.rss_url)
+
uploads = (
object.library.uploads.playable_by(None)
.prefetch_related(
@@ -170,6 +176,49 @@ class ChannelViewSet(
}
return response.Response(data)
+ @decorators.action(
+ methods=["post"],
+ detail=False,
+ url_path="rss-subscribe",
+ url_name="rss_subscribe",
+ )
+ @transaction.atomic
+ def rss_subscribe(self, request, *args, **kwargs):
+ serializer = serializers.RssSubscribeSerializer(data=request.data)
+ if not serializer.is_valid():
+ return response.Response(serializer.errors, status=400)
+ channel = (
+ models.Channel.objects.filter(rss_url=serializer.validated_data["url"],)
+ .order_by("id")
+ .first()
+ )
+ if not channel:
+ # try to retrieve the channel via its URL and create it
+ try:
+ channel, uploads = serializers.get_channel_from_rss_url(
+ serializer.validated_data["url"]
+ )
+ except serializers.FeedFetchException as e:
+ return response.Response({"detail": str(e)}, status=400,)
+
+ subscription = federation_models.Follow(actor=request.user.actor)
+ subscription.fid = subscription.get_federation_id()
+ subscription, created = SubscriptionsViewSet.queryset.get_or_create(
+ target=channel.actor,
+ actor=request.user.actor,
+ defaults={
+ "approved": True,
+ "fid": subscription.fid,
+ "uuid": subscription.uuid,
+ },
+ )
+ # prefetch stuff
+ subscription = SubscriptionsViewSet.queryset.get(pk=subscription.pk)
+
+ return response.Response(
+ serializers.SubscriptionSerializer(subscription).data, status=201
+ )
+
def get_serializer_context(self):
context = super().get_serializer_context()
context["subscriptions_count"] = self.action in [
diff --git a/api/funkwhale_api/common/utils.py b/api/funkwhale_api/common/utils.py
index 34b1dc006..49719e168 100644
--- a/api/funkwhale_api/common/utils.py
+++ b/api/funkwhale_api/common/utils.py
@@ -310,13 +310,21 @@ def render_plain_text(html):
return bleach.clean(html, tags=[], strip=True)
+def same_content(old, text=None, content_type=None):
+ return old.text == text and old.content_type == content_type
+
+
@transaction.atomic
def attach_content(obj, field, content_data):
from . import models
+ content_data = content_data or {}
existing = getattr(obj, "{}_id".format(field))
if existing:
+ if same_content(getattr(obj, field), **content_data):
+ # optimization to avoid a delete/save if possible
+ return getattr(obj, field)
getattr(obj, field).delete()
setattr(obj, field, None)
@@ -376,3 +384,15 @@ def attach_file(obj, field, file_data, fetch=False):
setattr(obj, field, attachment)
obj.save(update_fields=[field])
return attachment
+
+
+def get_mimetype_from_ext(path):
+ parts = path.lower().split(".")
+ ext = parts[-1]
+ match = {
+ "jpeg": "image/jpeg",
+ "jpg": "image/jpeg",
+ "png": "image/png",
+ "gif": "image/gif",
+ }
+ return match.get(ext)
diff --git a/api/funkwhale_api/common/views.py b/api/funkwhale_api/common/views.py
index 05cb025c3..1766ba127 100644
--- a/api/funkwhale_api/common/views.py
+++ b/api/funkwhale_api/common/views.py
@@ -163,6 +163,10 @@ class AttachmentViewSet(
@transaction.atomic
def proxy(self, request, *args, **kwargs):
instance = self.get_object()
+ if not settings.EXTERNAL_MEDIA_PROXY_ENABLED:
+ r = response.Response(status=302)
+ r["Location"] = instance.url
+ return r
size = request.GET.get("next", "original").lower()
if size not in ["original", "medium_square_crop"]:
diff --git a/api/funkwhale_api/federation/actors.py b/api/funkwhale_api/federation/actors.py
index 39161a9cb..187bd8c95 100644
--- a/api/funkwhale_api/federation/actors.py
+++ b/api/funkwhale_api/federation/actors.py
@@ -42,21 +42,32 @@ def get_actor(fid, skip_cache=False):
return serializer.save(last_fetch_date=timezone.now())
-def get_service_actor():
+_CACHE = {}
+
+
+def get_service_actor(cache=True):
+ if cache and "service_actor" in _CACHE:
+ return _CACHE["service_actor"]
+
name, domain = (
settings.FEDERATION_SERVICE_ACTOR_USERNAME,
settings.FEDERATION_HOSTNAME,
)
try:
- return models.Actor.objects.select_related().get(
+ actor = models.Actor.objects.select_related().get(
preferred_username=name, domain__name=domain
)
except models.Actor.DoesNotExist:
pass
+ else:
+ _CACHE["service_actor"] = actor
+ return actor
args = users_models.get_actor_data(name)
private, public = keys.get_key_pair()
args["private_key"] = private.decode("utf-8")
args["public_key"] = public.decode("utf-8")
args["type"] = "Service"
- return models.Actor.objects.create(**args)
+ actor = models.Actor.objects.create(**args)
+ _CACHE["service_actor"] = actor
+ return actor
diff --git a/api/funkwhale_api/federation/tasks.py b/api/funkwhale_api/federation/tasks.py
index 8cd0c0439..04457e1fc 100644
--- a/api/funkwhale_api/federation/tasks.py
+++ b/api/funkwhale_api/federation/tasks.py
@@ -311,6 +311,7 @@ def fetch(fetch_obj):
auth = signing.get_auth(actor.private_key, actor.private_key_id)
else:
auth = None
+ auth = None
try:
if url.startswith("webfinger://"):
# we first grab the correpsonding webfinger representation
diff --git a/api/funkwhale_api/federation/views.py b/api/funkwhale_api/federation/views.py
index 93977dcd2..7a16fbed4 100644
--- a/api/funkwhale_api/federation/views.py
+++ b/api/funkwhale_api/federation/views.py
@@ -13,7 +13,16 @@ from funkwhale_api.moderation import models as moderation_models
from funkwhale_api.music import models as music_models
from funkwhale_api.music import utils as music_utils
-from . import activity, authentication, models, renderers, serializers, utils, webfinger
+from . import (
+ actors,
+ activity,
+ authentication,
+ models,
+ renderers,
+ serializers,
+ utils,
+ webfinger,
+)
def redirect_to_html(public_url):
@@ -61,6 +70,10 @@ class ActorViewSet(FederationMixin, mixins.RetrieveModelMixin, viewsets.GenericV
queryset = models.Actor.objects.local().select_related("user")
serializer_class = serializers.ActorSerializer
+ def get_queryset(self):
+ queryset = super().get_queryset()
+ return queryset.exclude(channel__attributed_to=actors.get_service_actor())
+
def retrieve(self, request, *args, **kwargs):
instance = self.get_object()
if utils.should_redirect_ap_to_html(request.headers.get("accept")):
diff --git a/api/funkwhale_api/music/admin.py b/api/funkwhale_api/music/admin.py
index 584653ab9..56712746d 100644
--- a/api/funkwhale_api/music/admin.py
+++ b/api/funkwhale_api/music/admin.py
@@ -23,6 +23,13 @@ class TrackAdmin(admin.ModelAdmin):
list_select_related = ["album__artist", "artist"]
+@admin.register(models.TrackActor)
+class TrackActorAdmin(admin.ModelAdmin):
+ list_display = ["actor", "track", "upload", "internal"]
+ search_fields = ["actor__preferred_username", "track__name"]
+ list_select_related = ["actor", "track"]
+
+
@admin.register(models.ImportBatch)
class ImportBatchAdmin(admin.ModelAdmin):
list_display = ["submitted_by", "creation_date", "import_request", "status"]
diff --git a/api/funkwhale_api/music/models.py b/api/funkwhale_api/music/models.py
index e0adfe86b..c7ae71ca8 100644
--- a/api/funkwhale_api/music/models.py
+++ b/api/funkwhale_api/music/models.py
@@ -786,9 +786,13 @@ class Upload(models.Model):
with remote_response as r:
remote_response.raise_for_status()
extension = utils.get_ext_from_type(self.mimetype)
- title = " - ".join(
- [self.track.title, self.track.album.title, self.track.artist.name]
- )
+ title_parts = []
+ title_parts.append(self.track.title)
+ if self.track.album:
+ title_parts.append(self.track.album.title)
+ title_parts.append(self.track.artist.name)
+
+ title = " - ".join(title_parts)
filename = "{}.{}".format(title, extension)
tmp_file = tempfile.TemporaryFile()
for chunk in r.iter_content(chunk_size=512):
@@ -1126,7 +1130,7 @@ class LibraryQuerySet(models.QuerySet):
)
def viewable_by(self, actor):
- from funkwhale_api.federation.models import LibraryFollow
+ from funkwhale_api.federation.models import LibraryFollow, Follow
if actor is None:
return self.filter(privacy_level="everyone")
@@ -1136,11 +1140,17 @@ class LibraryQuerySet(models.QuerySet):
followed_libraries = LibraryFollow.objects.filter(
actor=actor, approved=True
).values_list("target", flat=True)
+ followed_channels_libraries = (
+ Follow.objects.exclude(target__channel=None)
+ .filter(actor=actor, approved=True,)
+ .values_list("target__channel__library", flat=True)
+ )
return self.filter(
me_query
| instance_query
| models.Q(privacy_level="everyone")
| models.Q(pk__in=followed_libraries)
+ | models.Q(pk__in=followed_channels_libraries)
)
@@ -1174,7 +1184,7 @@ class Library(federation_models.FederationMixin):
return "/library/{}".format(self.uuid)
def save(self, **kwargs):
- if not self.pk and not self.fid and self.actor.get_user():
+ if not self.pk and not self.fid and self.actor.is_local:
self.fid = self.get_federation_id()
self.followers_url = self.fid + "/followers"
@@ -1266,7 +1276,11 @@ class TrackActor(models.Model):
).values_list("id", "track")
objs = []
if library.privacy_level == "me":
- follow_queryset = library.received_follows.filter(approved=True).exclude(
+ if library.get_channel():
+ follow_queryset = library.channel.actor.received_follows
+ else:
+ follow_queryset = library.received_follows
+ follow_queryset = follow_queryset.filter(approved=True).exclude(
actor__user__isnull=True
)
if actor_ids:
diff --git a/api/requirements/base.txt b/api/requirements/base.txt
index 9053bdd3a..e2a21df29 100644
--- a/api/requirements/base.txt
+++ b/api/requirements/base.txt
@@ -79,3 +79,4 @@ click>=7,<8
service_identity==18.1.0
markdown>=3.2,<4
bleach>=3,<4
+feedparser==6.0.0b3
diff --git a/api/setup.cfg b/api/setup.cfg
index 581396c37..2b8f8e825 100644
--- a/api/setup.cfg
+++ b/api/setup.cfg
@@ -31,3 +31,4 @@ env =
FUNKWHALE_SPA_HTML_ROOT=http://noop/
PROXY_MEDIA=true
MUSIC_USE_DENORMALIZATION=true
+ EXTERNAL_MEDIA_PROXY_ENABLED=true
diff --git a/api/tests/audio/test_serializers.py b/api/tests/audio/test_serializers.py
index 7f2bc77a6..add106888 100644
--- a/api/tests/audio/test_serializers.py
+++ b/api/tests/audio/test_serializers.py
@@ -1,5 +1,7 @@
import datetime
+import uuid
+import feedparser
import pytest
import pytz
@@ -8,6 +10,7 @@ from django.templatetags.static import static
from funkwhale_api.audio import serializers
from funkwhale_api.common import serializers as common_serializers
from funkwhale_api.common import utils as common_utils
+from funkwhale_api.federation import actors
from funkwhale_api.federation import serializers as federation_serializers
from funkwhale_api.federation import utils as federation_utils
from funkwhale_api.music import serializers as music_serializers
@@ -232,6 +235,28 @@ def test_channel_serializer_representation(factories, to_api_date):
assert serializers.ChannelSerializer(channel).data == expected
+def test_channel_serializer_external_representation(factories, to_api_date):
+ content = factories["common.Content"]()
+ channel = factories["audio.Channel"](artist__description=content, external=True)
+
+ expected = {
+ "artist": music_serializers.serialize_artist_simple(channel.artist),
+ "uuid": str(channel.uuid),
+ "creation_date": to_api_date(channel.creation_date),
+ "actor": None,
+ "attributed_to": federation_serializers.APIActorSerializer(
+ channel.attributed_to
+ ).data,
+ "metadata": {},
+ "rss_url": channel.get_rss_url(),
+ }
+ expected["artist"]["description"] = common_serializers.ContentSerializer(
+ content
+ ).data
+
+ assert serializers.ChannelSerializer(channel).data == expected
+
+
def test_channel_serializer_representation_subscriptions_count(factories, to_api_date):
channel = factories["audio.Channel"]()
factories["federation.Follow"](target=channel.actor)
@@ -351,7 +376,12 @@ def test_rss_channel_serializer(factories):
"href": channel.get_rss_url(),
"rel": "self",
"type": "application/rss+xml",
- }
+ },
+ {
+ "href": channel.actor.fid,
+ "rel": "alternate",
+ "type": "application/activity+json",
+ },
],
}
@@ -446,3 +476,440 @@ def test_channel_metadata_serializer_validation():
payload.pop("unknown_key")
assert serializer.validated_data == payload
+
+
+def test_rss_feed_serializer_create(db, now):
+ rss_url = "http://example.rss/"
+
+ xml_payload = """
+
Html content
" + assert upload.track.description.content_type == "text/html" + + +def test_rss_feed_item_serializer_update(factories): + rss_url = "http://example.rss/" + channel = factories["audio.Channel"](rss_url=rss_url, external=True) + expected_uuid = uuid.uuid3( + uuid.NAMESPACE_URL, + "rss://{}-16f66fff-41ae-4a1c-9101-2746218c4f32".format(channel.pk), + ) + upload = factories["music.Upload"]( + track__uuid=expected_uuid, + source="https://file.domain/audio.mp3", + library=channel.library, + track__artist=channel.artist, + ) + track = upload.track + + xml_payload = """ +Html content
" + assert upload.track.description.content_type == "text/html" + + +def test_get_channel_from_rss_url(db, r_mock, mocker): + rss_url = "http://example.rss/" + xml_payload = """ +