bkane56
bkane56

Reputation: 1679

Python unicode to ASCII conversion in write to file

I am writing a script to go through a music library and print a .txt file with the album name date......and then numbered tracks. It works perfectly until the the imported tags (as unicode) gets to a (-). Then I get a:

  File "C:/Users/Brian/Python files/CDinfoRF2.py", line 51, in music_album_info
    mfile.write(header)
UnicodeEncodeError: 'ascii' codec can't encode character u'\u2013' in position 18: ordinal not in range(128).

The code:

#!usr/bin/env python
__author__ = 'Brian Kane'

"""This scripts takes a path argument to the root directory of the music files (mp3 here) and
   writes various information about the disc to a text file which is named by the artist"""

import io
import os
from os.path import *
import string
from mutagen.mp3 import MP3
from mutagen.easyid3 import EasyID3
import unicodedata

def music_album_info(path):

    count = 0

    for root, dirs, files in os.walk(path):                             # walks through the data tree to get files
        for name in files:
            path_name = root +'\\' + name
            extension = os.path.splitext(name)[1][1:].strip().lower()   # gets the file extension
            if extension == 'mp3':
                artist = EasyID3(path_name)['artist'][0]                # gets unicode artist name
                # print artist
                track_num = EasyID3(path_name)['tracknumber']           # gets unicode tracknumer
                album = EasyID3(path_name)['album'][0]
                # print album
                # print type(album)
                album.encode('utf-8')
                # length = EasyID3(path_name)['length'][0]
                print album
                date = EasyID3(path_name)['date'][0]
                # print date
                track_name = EasyID3(path_name)['title']
                # print track_name
                header = '\n' + 'Title:  ' + album + '        Released ' + date + '\n\n'
                # if EasyID3(path_name)['discnumber'] != []:
                #     disc = EasyID3(path_name)['discnumber']
                #     header = '\n' + 'Title:  ' + album + 'Disc: ' + disc + '        Released ' + date + '\n\n'
                file_name = artist + '.txt'                             # used to name file to be written to = artist
                mp3info = EasyID3(path_name)


                # print mp3info.items()
                # print 'header in',header
                if count < 1:                                           # used to not write header over each track
                    # print 'header = ', header
                    mfile = open(file_name, 'a')
                    mfile.write(header)
                    count += 1

                if extension == 'mp3':                                  # avoid album art and errors which occur

                    track_info = '\t' +  track_num[0] + '. ' + track_name[0] + '\n'
                    # if EasyID3(path_name)'length' in mp3info.items():
                    #     length = EasyID3(path_name)['length'][0]
                    #     print length
                    #     track_info = '\t' +  track_num[0] + '. ' + track_name[0] + '   ' + length[0] + '\n'
                    # print 'track_info = ',track_info
                    mfile.write(track_info)
        count = 0                                                       # reset for next artist - new .txt file

    mfile.close()

path = 'C:\\test'                       # this is a test path
# path = raw_input('Enter the path to the music file:\n>')

music_album_info(path)

Upvotes: 1

Views: 1254

Answers (1)

Martijn Pieters
Martijn Pieters

Reputation: 1121734

You discarded the UTF-8 encoding of the album:

album.encode('utf-8')

Strings are immutable; you essentially discarded the bytestring return value. Store it:

album = album.encode('utf-8')

You could instead use the io.open() function to open a file object that automatically encodes all Unicode written to it to UTF-8:

with io.open(file_name, 'a', encoding='utf-8') as mfile:
    mfile.write(header)

You don't need to encode the album at all in that case.

Upvotes: 6

Related Questions