Scrobble Exporter

Despite using last.fm for almost a decade, I’d never used it for anything other than scrobbling, so when the upgrade disabled this functionality I began looking for alternatives. The obvious choice was Libre.fm, which I’m happily using at the moment. Unlike Last.FM, getting at my scrobbles isn’t quite as easy (where I was using IFFFT to copy scrobbles to a Google spreadsheet), but the guys behind Libre.fm released a Python export script for Last/Libre.fm which fits the bill admirably.
Below is a subtly adapted version that I’ll use to analyse my listening habits. The script will output a text file delimited by tabs with the following information:
date
artist name
track title
album title
I don’t think there is an easy way to shoe-horn genre into the script (it could possibly be done by comparing the album mbid on MusicBrainz)
#!/usr/bin/env python
#-*- coding: utf-8 -*-

#copied from https://github.com/encukou/lastscrape-gui.
#tweaked to remove the MusicBrainz gubbins from the output, and format date
#instead of using the script as an importer, the intent is to simply export
#scrobble information with a view to ue it for lifelogging
###########################################################################
##example usage: ~/Desktop/lastexport.py -u jasmine_elm -s libre.fm
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
Script for exporting tracks through audioscrobbler API.
Usage: lastexport.py -u USER [-o OUTFILE] [-p STARTPAGE] [-s SERVER]
"""

import urllib2, urllib, sys, time, re
import xml.etree.ElementTree as ET
from optparse import OptionParser


__version__ = '0.0.4'

def get_options(parser):
    """ Define command line options."""
    parser.add_option("-u", "--user", dest="username", default=None,
                      help="User name.")
    parser.add_option("-o", "--outfile", dest="outfile", default="exported_tracks.txt",
                      help="Output file, default is exported_tracks.txt")
    parser.add_option("-p", "--page", dest="startpage", type="int", default="1",
                      help="Page to start fetching tracks from, default is 1")
    parser.add_option("-s", "--server", dest="server", default="last.fm",
                      help="Server to fetch track info from, default is last.fm")
    parser.add_option("-t", "--type", dest="infotype", default="scrobbles",
                      help="Type of information to export, scrobbles|loved|banned, default is scrobbles")
    options, args = parser.parse_args()

    if not options.username:
        sys.exit("User name not specified, see --help")

    if options.infotype == "loved":
        infotype = "lovedtracks"
    elif options.infotype == "banned":
        infotype = "bannedtracks"
    else:
        infotype = "recenttracks"

    return options.username, options.outfile, options.startpage, options.server, infotype

def connect_server(server, username, startpage, sleep_func=time.sleep, tracktype='recenttracks'):
    """ Connect to server and get a XML page."""
    if server == "libre.fm":
        baseurl = 'http://alpha.libre.fm/2.0/?'
        urlvars = dict(method='user.get%s' % tracktype,
                    api_key=('lastexport.py-%s' % __version__).ljust(32, '-'),
                    user=username,
                    page=startpage,
                    limit=200)

    elif server == "last.fm":
        baseurl = 'http://ws.audioscrobbler.com/2.0/?'
        urlvars = dict(method='user.get%s' % tracktype,
                    api_key='e38cc7822bd7476fe4083e36ee69748e',
                    user=username,
                    page=startpage,
                    limit=50)
    else:
        if server[:7] != 'http://':
            server = 'http://%s' % server
        baseurl = server + '/2.0/?'
        urlvars = dict(method='user.get%s' % tracktype,
                    api_key=('lastexport.py-%s' % __version__).ljust(32, '-'),
                    user=username,
                    page=startpage,
                    limit=200)

    url = baseurl + urllib.urlencode(urlvars)
    for interval in (1, 5, 10, 62):
        try:
            f = urllib2.urlopen(url)
            break
        except Exception, e:
            last_exc = e
            print "Exception occured, retrying in %ds: %s" % (interval, e)
            sleep_func(interval)
    else:
        print "Failed to open page %s" % urlvars['page']
        raise last_exc

    response = f.read()
    f.close()

    #bad hack to fix bad xml
    response = re.sub('\xef\xbf\xbe', '', response)
    # Unbelievably, some people have ASCII control characters
    # in their scrobbles: I ran across a \x04 (end of transmission).
    # Remove all of those except \n and \t
    response = re.sub('[\0-\x08\x0b-\x1f]', '', response)
    return response

def get_pageinfo(response, tracktype='recenttracks'):
    """Check how many pages of tracks the user have."""
    xmlpage = ET.fromstring(response)
    totalpages = xmlpage.find(tracktype).attrib.get('totalPages')
    return int(totalpages)

def get_tracklist(response):
    """Read XML page and get a list of tracks and their info."""
    xmlpage = ET.fromstring(response)
    tracklist = xmlpage.getiterator('track')
    return tracklist

def parse_track(trackelement):
    """Extract info from every track entry and output to list."""
    if trackelement.find('artist').getchildren():
        #artist info is nested in loved/banned tracks xml
        artistname = trackelement.find('artist').find('name').text

    else:
        artistname = trackelement.find('artist').text


    if trackelement.find('album') is None:
        #no album info for loved/banned tracks
        albumname = ''
        albummbid = ''
    else:
        albumname = trackelement.find('album').text


    trackname = trackelement.find('name').text

    date = time.strftime('%Y-%m-%d %H:%M', time.localtime(float(trackelement.find('date').get('uts'))))

    output = [date, artistname, trackname, albumname]

    for i, v in enumerate(output):
        if v is None:
            output[i] = ''

    return output

def write_tracks(tracks, outfileobj):
    """Write tracks to an open file"""
    for fields in tracks:
        outfileobj.write(("\t".join(fields) + "\n").encode('utf-8'))

def get_tracks(server, username, startpage=1, sleep_func=time.sleep, tracktype='recenttracks'):
    page = startpage
    response = connect_server(server, username, page, sleep_func, tracktype)
    totalpages = get_pageinfo(response, tracktype)

    if startpage > totalpages:
        raise ValueError("First page (%s) is higher than total pages (%s)." % (startpage, totalpages))

    while page <= totalpages:
        #Skip connect if on first page, already have that one stored.

        if page > startpage:
            response =  connect_server(server, username, page, sleep_func, tracktype)

        tracklist = get_tracklist(response)

        tracks = []
        for trackelement in tracklist:
            # do not export the currently playing track.
            if not trackelement.attrib.has_key("nowplaying") or not trackelement.attrib["nowplaying"]:
                tracks.append(parse_track(trackelement))

        yield page, totalpages, tracks

        page += 1
        sleep_func(.5)

def main(server, username, startpage, outfile, infotype='recenttracks'):
    trackdict = dict()
    page = startpage  # for case of exception
    totalpages = -1  # ditto
    n = 0
    try:
        for page, totalpages, tracks in get_tracks(server, username, startpage, tracktype=infotype):
            print "Got page %s of %s.." % (page, totalpages)
            for track in tracks:
                if infotype == 'recenttracks':
                    trackdict.setdefault(track[0], track)
                else:
                    #Can not use timestamp as key for loved/banned tracks as it's not unique
                    n += 1
                    trackdict.setdefault(n, track)
    except ValueError, e:
        exit(e)
    except Exception:
        raise
    finally:
        with open(outfile, 'a') as outfileobj:
            tracks = sorted(trackdict.values(), reverse=True)
            write_tracks(tracks, outfileobj)
            print "Wrote page %s-%s of %s to file %s" % (startpage, page, totalpages, outfile)

if __name__ == "__main__":
    parser = OptionParser()
    username, outfile, startpage, server, infotype = get_options(parser)
    main(server, username, startpage, outfile, infotype)