Merge branch '195-bitrate-size' into 'develop'

Resolve "Store file bitrate and size"

Closes #195

See merge request funkwhale/funkwhale!196
This commit is contained in:
Eliot Berriot 2018-05-16 16:58:34 +00:00
commit ceccaa1387
25 changed files with 439 additions and 16 deletions

View File

@ -10,3 +10,4 @@ PYTHONDONTWRITEBYTECODE=true
WEBPACK_DEVSERVER_PORT=8080
MUSIC_DIRECTORY_PATH=/music
BROWSABLE_API_ENABLED=True
CACHEOPS_ENABLED=False

View File

@ -233,6 +233,9 @@ class AudioMetadataFactory(factory.Factory):
release = factory.LazyAttribute(
lambda o: 'https://musicbrainz.org/release/{}'.format(uuid.uuid4())
)
bitrate = 42
length = 43
size = 44
class Meta:
model = dict

View File

@ -216,3 +216,6 @@ class LibraryTrack(models.Model):
for chunk in r.iter_content(chunk_size=512):
tmp_file.write(chunk)
self.audio_file.save(filename, tmp_file)
def get_metadata(self, key):
return self.metadata.get(key)

View File

@ -688,6 +688,12 @@ class AudioMetadataSerializer(serializers.Serializer):
artist = ArtistMetadataSerializer()
release = ReleaseMetadataSerializer()
recording = RecordingMetadataSerializer()
bitrate = serializers.IntegerField(
required=False, allow_null=True, min_value=0)
size = serializers.IntegerField(
required=False, allow_null=True, min_value=0)
length = serializers.IntegerField(
required=False, allow_null=True, min_value=0)
class AudioSerializer(serializers.Serializer):
@ -760,6 +766,9 @@ class AudioSerializer(serializers.Serializer):
'musicbrainz_id': str(track.mbid) if track.mbid else None,
'title': track.title,
},
'bitrate': instance.bitrate,
'size': instance.size,
'length': instance.duration,
},
'url': {
'href': utils.full_url(instance.path),

View File

@ -74,6 +74,8 @@ class TrackFileAdmin(admin.ModelAdmin):
'source',
'duration',
'mimetype',
'size',
'bitrate'
]
list_select_related = [
'track'

View File

@ -54,6 +54,10 @@ class TrackFileFactory(factory.django.DjangoModelFactory):
audio_file = factory.django.FileField(
from_path=os.path.join(SAMPLES_PATH, 'test.ogg'))
bitrate = None
size = None
duration = None
class Meta:
model = 'music.TrackFile'

View File

@ -2,6 +2,7 @@ import cacheops
import os
from django.db import transaction
from django.db.models import Q
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
@ -24,6 +25,8 @@ class Command(BaseCommand):
if options['dry_run']:
self.stdout.write('Dry-run on, will not commit anything')
self.fix_mimetypes(**options)
self.fix_file_data(**options)
self.fix_file_size(**options)
cacheops.invalidate_model(models.TrackFile)
@transaction.atomic
@ -43,3 +46,60 @@ class Command(BaseCommand):
if not dry_run:
self.stdout.write('[mimetypes] commiting...')
qs.update(mimetype=mimetype)
def fix_file_data(self, dry_run, **kwargs):
self.stdout.write('Fixing missing bitrate or length...')
matching = models.TrackFile.objects.filter(
Q(bitrate__isnull=True) | Q(duration__isnull=True))
total = matching.count()
self.stdout.write(
'[bitrate/length] {} entries found with missing values'.format(
total))
if dry_run:
return
for i, tf in enumerate(matching.only('audio_file')):
self.stdout.write(
'[bitrate/length] {}/{} fixing file #{}'.format(
i+1, total, tf.pk
))
try:
audio_file = tf.get_audio_file()
if audio_file:
with audio_file as f:
data = utils.get_audio_file_data(audio_file)
tf.bitrate = data['bitrate']
tf.duration = data['length']
tf.save(update_fields=['duration', 'bitrate'])
else:
self.stderr.write('[bitrate/length] no file found')
except Exception as e:
self.stderr.write(
'[bitrate/length] error with file #{}: {}'.format(
tf.pk, str(e)
)
)
def fix_file_size(self, dry_run, **kwargs):
self.stdout.write('Fixing missing size...')
matching = models.TrackFile.objects.filter(size__isnull=True)
total = matching.count()
self.stdout.write(
'[size] {} entries found with missing values'.format(total))
if dry_run:
return
for i, tf in enumerate(matching.only('size')):
self.stdout.write(
'[size] {}/{} fixing file #{}'.format(
i+1, total, tf.pk
))
try:
tf.size = tf.get_file_size()
tf.save(update_fields=['size'])
except Exception as e:
self.stderr.write(
'[size] error with file #{}: {}'.format(
tf.pk, str(e)
)
)

View File

@ -0,0 +1,29 @@
# Generated by Django 2.0.3 on 2018-05-15 18:08
from django.db import migrations, models
import taggit.managers
class Migration(migrations.Migration):
dependencies = [
('music', '0026_trackfile_accessed_date'),
]
operations = [
migrations.AddField(
model_name='trackfile',
name='bitrate',
field=models.IntegerField(blank=True, null=True),
),
migrations.AddField(
model_name='trackfile',
name='size',
field=models.IntegerField(blank=True, null=True),
),
migrations.AlterField(
model_name='track',
name='tags',
field=taggit.managers.TaggableManager(blank=True, help_text='A comma-separated list of tags.', through='taggit.TaggedItem', to='taggit.Tag', verbose_name='Tags'),
),
]

View File

@ -429,6 +429,8 @@ class TrackFile(models.Model):
modification_date = models.DateTimeField(auto_now=True)
accessed_date = models.DateTimeField(null=True, blank=True)
duration = models.IntegerField(null=True, blank=True)
size = models.IntegerField(null=True, blank=True)
bitrate = models.IntegerField(null=True, blank=True)
acoustid_track_id = models.UUIDField(null=True, blank=True)
mimetype = models.CharField(null=True, blank=True, max_length=200)
@ -477,6 +479,41 @@ class TrackFile(models.Model):
return
return os.path.splitext(self.audio_file.name)[-1].replace('.', '', 1)
def get_file_size(self):
if self.audio_file:
return self.audio_file.size
if self.source.startswith('file://'):
return os.path.getsize(self.source.replace('file://', '', 1))
if self.library_track and self.library_track.audio_file:
return self.library_track.audio_file.size
def get_audio_file(self):
if self.audio_file:
return self.audio_file.open()
if self.source.startswith('file://'):
return open(self.source.replace('file://', '', 1), 'rb')
if self.library_track and self.library_track.audio_file:
return self.library_track.audio_file.open()
def set_audio_data(self):
audio_file = self.get_audio_file()
if audio_file:
with audio_file as f:
audio_data = utils.get_audio_file_data(f)
if not audio_data:
return
self.duration = int(audio_data['length'])
self.bitrate = audio_data['bitrate']
self.size = self.get_file_size()
else:
lt = self.library_track
if lt:
self.duration = lt.get_metadata('length')
self.size = lt.get_metadata('size')
self.bitrate = lt.get_metadata('bitrate')
def save(self, **kwargs):
if not self.mimetype and self.audio_file:
self.mimetype = utils.guess_mimetype(self.audio_file)

View File

@ -27,6 +27,7 @@ class SimpleArtistSerializer(serializers.ModelSerializer):
class ArtistSerializer(serializers.ModelSerializer):
tags = TagSerializer(many=True, read_only=True)
class Meta:
model = models.Artist
fields = ('id', 'mbid', 'name', 'tags', 'creation_date')
@ -40,11 +41,21 @@ class TrackFileSerializer(serializers.ModelSerializer):
fields = (
'id',
'path',
'duration',
'source',
'filename',
'mimetype',
'track')
'track',
'duration',
'mimetype',
'bitrate',
'size',
)
read_only_fields = [
'duration',
'mimetype',
'bitrate',
'size',
]
def get_path(self, o):
url = o.path

View File

@ -134,6 +134,7 @@ def _do_import(import_job, replace=False, use_acoustid=True):
# in place import, we set mimetype from extension
path, ext = os.path.splitext(import_job.source)
track_file.mimetype = music_utils.get_type_from_ext(ext)
track_file.set_audio_data()
track_file.save()
import_job.status = 'finished'
import_job.track_file = track_file

View File

@ -1,5 +1,6 @@
import magic
import mimetypes
import mutagen
import re
from django.db.models import Q
@ -66,7 +67,7 @@ def compute_status(jobs):
AUDIO_EXTENSIONS_AND_MIMETYPE = [
('ogg', 'audio/ogg'),
('mp3', 'audio/mpeg'),
('flac', 'audio/flac'),
('flac', 'audio/x-flac'),
]
EXTENSION_TO_MIMETYPE = {ext: mt for ext, mt in AUDIO_EXTENSIONS_AND_MIMETYPE}
@ -82,3 +83,14 @@ def get_type_from_ext(extension):
# we remove leading dot
extension = extension[1:]
return EXTENSION_TO_MIMETYPE.get(extension)
def get_audio_file_data(f):
data = mutagen.File(f)
if not data:
return
d = {}
d['bitrate'] = data.info.bitrate
d['length'] = data.info.length
return d

View File

@ -268,6 +268,10 @@ def handle_serve(track_file):
qs = LibraryTrack.objects.select_for_update()
library_track = qs.get(pk=library_track.pk)
library_track.download_audio()
track_file.library_track = library_track
track_file.set_audio_data()
track_file.save(update_fields=['bitrate', 'duration', 'size'])
audio_file = library_track.audio_file
file_path = get_file_path(audio_file)
mt = library_track.audio_mimetype
@ -296,7 +300,11 @@ def handle_serve(track_file):
class TrackFileViewSet(viewsets.ReadOnlyModelViewSet):
queryset = (models.TrackFile.objects.all().order_by('-id'))
queryset = (
models.TrackFile.objects.all()
.select_related('track__artist', 'track__album')
.order_by('-id')
)
serializer_class = serializers.TrackFileSerializer
authentication_classes = rest_settings.api_settings.DEFAULT_AUTHENTICATION_CLASSES + [
SignatureAuthentication

View File

@ -81,6 +81,10 @@ def get_track_data(album, track, tf):
'artistId': album.artist.pk,
'type': 'music',
}
if tf.bitrate:
data['bitrate'] = int(tf.bitrate/1000)
if tf.size:
data['size'] = tf.size
if album.release_date:
data['year'] = album.release_date.year
return data
@ -211,5 +215,9 @@ def get_music_directory_data(artist):
'parent': artist.id,
'type': 'music',
}
if tf.bitrate:
td['bitrate'] = int(tf.bitrate/1000)
if tf.size:
td['size'] = tf.size
data['child'].append(td)
return data

View File

@ -533,7 +533,12 @@ def test_activity_pub_audio_serializer_to_library_track_no_duplicate(
def test_activity_pub_audio_serializer_to_ap(factories):
tf = factories['music.TrackFile'](mimetype='audio/mp3')
tf = factories['music.TrackFile'](
mimetype='audio/mp3',
bitrate=42,
duration=43,
size=44,
)
library = actors.SYSTEM_ACTORS['library'].get_actor_instance()
expected = {
'@context': serializers.AP_CONTEXT,
@ -555,6 +560,9 @@ def test_activity_pub_audio_serializer_to_ap(factories):
'musicbrainz_id': tf.track.mbid,
'title': tf.track.title,
},
'size': tf.size,
'length': tf.duration,
'bitrate': tf.bitrate,
},
'url': {
'href': utils.full_url(tf.path),
@ -599,6 +607,9 @@ def test_activity_pub_audio_serializer_to_ap_no_mbid(factories):
'title': tf.track.title,
'musicbrainz_id': None,
},
'size': None,
'length': None,
'bitrate': None,
},
'url': {
'href': utils.full_url(tf.path),

View File

@ -0,0 +1,45 @@
from funkwhale_api.music.management.commands import fix_track_files
def test_fix_track_files_bitrate_length(factories, mocker):
tf1 = factories['music.TrackFile'](bitrate=1, duration=2)
tf2 = factories['music.TrackFile'](bitrate=None, duration=None)
c = fix_track_files.Command()
mocker.patch(
'funkwhale_api.music.utils.get_audio_file_data',
return_value={'bitrate': 42, 'length': 43})
c.fix_file_data(dry_run=False)
tf1.refresh_from_db()
tf2.refresh_from_db()
# not updated
assert tf1.bitrate == 1
assert tf1.duration == 2
# updated
assert tf2.bitrate == 42
assert tf2.duration == 43
def test_fix_track_files_size(factories, mocker):
tf1 = factories['music.TrackFile'](size=1)
tf2 = factories['music.TrackFile'](size=None)
c = fix_track_files.Command()
mocker.patch(
'funkwhale_api.music.models.TrackFile.get_file_size',
return_value=2)
c.fix_file_size(dry_run=False)
tf1.refresh_from_db()
tf2.refresh_from_db()
# not updated
assert tf1.size == 1
# updated
assert tf2.size == 2

View File

@ -1,4 +1,5 @@
import json
import os
import pytest
from django.urls import reverse
@ -7,6 +8,8 @@ from funkwhale_api.federation import actors
from funkwhale_api.federation import serializers as federation_serializers
from funkwhale_api.music import tasks
DATA_DIR = os.path.dirname(os.path.abspath(__file__))
def test_create_import_can_bind_to_request(
artists, albums, mocker, factories, superuser_api_client):
@ -40,11 +43,20 @@ def test_create_import_can_bind_to_request(
assert batch.import_request == request
def test_import_job_from_federation_no_musicbrainz(factories):
def test_import_job_from_federation_no_musicbrainz(factories, mocker):
mocker.patch(
'funkwhale_api.music.utils.get_audio_file_data',
return_value={'bitrate': 24, 'length': 666})
mocker.patch(
'funkwhale_api.music.models.TrackFile.get_file_size',
return_value=42)
lt = factories['federation.LibraryTrack'](
artist_name='Hello',
album_title='World',
title='Ping',
metadata__length=42,
metadata__bitrate=43,
metadata__size=44,
)
job = factories['music.ImportJob'](
federation=True,
@ -56,6 +68,9 @@ def test_import_job_from_federation_no_musicbrainz(factories):
tf = job.track_file
assert tf.mimetype == lt.audio_mimetype
assert tf.duration == 42
assert tf.bitrate == 43
assert tf.size == 44
assert tf.library_track == job.library_track
assert tf.track.title == 'Ping'
assert tf.track.artist.name == 'Hello'
@ -234,13 +249,13 @@ def test_import_batch_notifies_followers(
def test__do_import_in_place_mbid(factories, tmpfile):
path = '/test.ogg'
path = os.path.join(DATA_DIR, 'test.ogg')
job = factories['music.ImportJob'](
in_place=True, source='file:///test.ogg')
in_place=True, source='file://{}'.format(path))
track = factories['music.Track'](mbid=job.mbid)
tf = tasks._do_import(job, use_acoustid=False)
assert bool(tf.audio_file) is False
assert tf.source == 'file:///test.ogg'
assert tf.source == 'file://{}'.format(path)
assert tf.mimetype == 'audio/ogg'

View File

@ -85,3 +85,28 @@ def test_track_file_file_name(factories):
tf = factories['music.TrackFile'](audio_file__from_path=path)
assert tf.filename == tf.track.full_name + '.mp3'
def test_track_get_file_size(factories):
name = 'test.mp3'
path = os.path.join(DATA_DIR, name)
tf = factories['music.TrackFile'](audio_file__from_path=path)
assert tf.get_file_size() == 297745
def test_track_get_file_size_federation(factories):
tf = factories['music.TrackFile'](
federation=True,
library_track__with_audio_file=True)
assert tf.get_file_size() == tf.library_track.audio_file.size
def test_track_get_file_size_in_place(factories):
name = 'test.mp3'
path = os.path.join(DATA_DIR, name)
tf = factories['music.TrackFile'](
in_place=True, source='file://{}'.format(path))
assert tf.get_file_size() == 297745

View File

@ -62,6 +62,9 @@ def test_import_job_can_run_with_file_and_acoustid(
'score': 0.860825}],
'status': 'ok'
}
mocker.patch(
'funkwhale_api.music.utils.get_audio_file_data',
return_value={'bitrate': 42, 'length': 43})
mocker.patch(
'funkwhale_api.musicbrainz.api.artists.get',
return_value=artists['get']['adhesive_wombat'])
@ -82,7 +85,9 @@ def test_import_job_can_run_with_file_and_acoustid(
with open(path, 'rb') as f:
assert track_file.audio_file.read() == f.read()
assert track_file.duration == 268
assert track_file.bitrate == 42
assert track_file.duration == 43
assert track_file.size == os.path.getsize(path)
# audio file is deleted from import job once persisted to audio file
assert not job.audio_file
assert job.status == 'finished'

View File

@ -1,5 +1,10 @@
import os
import pytest
from funkwhale_api.music import utils
DATA_DIR = os.path.dirname(os.path.abspath(__file__))
def test_guess_mimetype_try_using_extension(factories, mocker):
mocker.patch(
@ -17,3 +22,16 @@ def test_guess_mimetype_try_using_extension_if_fail(factories, mocker):
audio_file__filename='test.mp3')
assert utils.guess_mimetype(f.audio_file) == 'audio/mpeg'
@pytest.mark.parametrize('name, expected', [
('sample.flac', {'bitrate': 1608000, 'length': 0.001}),
('test.mp3', {'bitrate': 8000, 'length': 267.70285714285717}),
('test.ogg', {'bitrate': 128000, 'length': 229.18304166666667}),
])
def test_get_audio_file_data(name, expected):
path = os.path.join(DATA_DIR, name)
with open(path, 'rb') as f:
result = utils.get_audio_file_data(f)
assert result == expected

View File

@ -77,7 +77,8 @@ def test_get_album_serializer(factories):
artist = factories['music.Artist']()
album = factories['music.Album'](artist=artist)
track = factories['music.Track'](album=album)
tf = factories['music.TrackFile'](track=track)
tf = factories['music.TrackFile'](
track=track, bitrate=42000, duration=43, size=44)
expected = {
'id': album.pk,
@ -98,7 +99,9 @@ def test_get_album_serializer(factories):
'year': track.album.release_date.year,
'contentType': tf.mimetype,
'suffix': tf.extension or '',
'duration': tf.duration or 0,
'bitrate': 42,
'duration': 43,
'size': 44,
'created': track.creation_date,
'albumId': album.pk,
'artistId': artist.pk,
@ -177,7 +180,8 @@ def test_playlist_detail_serializer(factories):
def test_directory_serializer_artist(factories):
track = factories['music.Track']()
tf = factories['music.TrackFile'](track=track)
tf = factories['music.TrackFile'](
track=track, bitrate=42000, duration=43, size=44)
album = track.album
artist = track.artist
@ -195,7 +199,9 @@ def test_directory_serializer_artist(factories):
'year': track.album.release_date.year,
'contentType': tf.mimetype,
'suffix': tf.extension or '',
'duration': tf.duration or 0,
'bitrate': 42,
'duration': 43,
'size': 44,
'created': track.creation_date,
'albumId': album.pk,
'artistId': artist.pk,

View File

@ -0,0 +1,42 @@
Store file length, size and bitrate (#195)
Storage of bitrate, size and length in database
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Starting with this release, when importing files, Funkwhale will store
additional information about audio files:
- Bitrate
- Size (in bytes)
- Duration
This change is not retroactive, meaning already imported files will lack those
informations. The interface and API should work as before in such case, however,
we offer a command to deal with legacy files and populate the missing values.
On docker setups:
.. code-block:: shell
docker-compose run --rm api python manage.py fix_track_files
On non-docker setups:
.. code-block:: shell
# from your activated virtualenv
python manage.py fix_track_files
.. note::
The execution time for this command is proportional to the number of
audio files stored on your instance. This is because we need to read the
files from disk to fetch the data. You can run it in the background
while Funkwhale is up.
It's also safe to interrupt this command and rerun it at a later point, or run
it multiple times.
Use the --dry-run flag to check how many files would be impacted.

View File

@ -6,6 +6,6 @@ export default {
formatsMap: {
'audio/ogg': 'ogg',
'audio/mpeg': 'mp3',
'audio/flac': 'flac'
'audio/x-flac': 'flac'
}
}

View File

@ -44,6 +44,46 @@
</a>
</div>
</div>
<div v-if="file" class="ui vertical stripe center aligned segment">
<h2 class="ui header">{{ $t('Track information') }}</h2>
<table class="ui very basic collapsing celled center aligned table">
<tbody>
<tr>
<td>
{{ $t('Duration') }}
</td>
<td v-if="file.duration">
{{ time.parse(file.duration) }}
</td>
<td v-else>
{{ $t('N/A') }}
</td>
</tr>
<tr>
<td>
{{ $t('Size') }}
</td>
<td v-if="file.size">
{{ file.size | humanSize }}
</td>
<td v-else>
{{ $t('N/A') }}
</td>
</tr>
<tr>
<td>
{{ $t('Bitrate') }}
</td>
<td v-if="file.bitrate">
{{ file.bitrate | humanSize }}/s
</td>
<td v-else>
{{ $t('N/A') }}
</td>
</tr>
</tbody>
</table>
</div>
<div class="ui vertical stripe center aligned segment">
<h2><i18next path="Lyrics"/></h2>
<div v-if="isLoadingLyrics" class="ui vertical segment">
@ -64,6 +104,8 @@
</template>
<script>
import time from '@/utils/time'
import axios from 'axios'
import url from '@/utils/url'
import logger from '@/logging'
@ -83,6 +125,7 @@ export default {
},
data () {
return {
time,
isLoadingTrack: true,
isLoadingLyrics: true,
track: null,
@ -134,6 +177,9 @@ export default {
return u
}
},
file () {
return this.track.files[0]
},
lyricsSearchUrl () {
let base = 'http://lyrics.wikia.com/wiki/Special:Search?query='
let query = this.track.artist.name + ' ' + this.track.title
@ -159,5 +205,8 @@ export default {
<!-- Add "scoped" attribute to limit CSS to this component only -->
<style scoped lang="scss">
.table.center.aligned {
margin-left: auto;
margin-right: auto;
}
</style>

View File

@ -47,4 +47,23 @@ export function capitalize (str) {
Vue.filter('capitalize', capitalize)
export function humanSize (bytes) {
let si = true
var thresh = si ? 1000 : 1024
if (Math.abs(bytes) < thresh) {
return bytes + ' B'
}
var units = si
? ['kB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']
: ['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']
var u = -1
do {
bytes /= thresh
++u
} while (Math.abs(bytes) >= thresh && u < units.length - 1)
return bytes.toFixed(1) + ' ' + units[u]
}
Vue.filter('humanSize', humanSize)
export default {}