2012-04-08, 23:45
Hi,
I am currently using the script cu.lyrics with lyricsmode and I am very happy with it however sometimes it doesn't seem to find the lyrics although I can find them using the website.
I slightly modified the scraper code so it uses the search box from lyrics mode if the direct url guessing didn't work.
It can often be the case if the song or artist have not been written exactly like lyricsmode stored it in its database (the cranberries vs cranberries) of because it contains some special characters (k's choice).
The modifications I applied are shown at the end of this post (didn't find how to attach a file). Would it be possible to commit them to the cu.lyrics code ?
Thanks in advance,
Yann
I am currently using the script cu.lyrics with lyricsmode and I am very happy with it however sometimes it doesn't seem to find the lyrics although I can find them using the website.
I slightly modified the scraper code so it uses the search box from lyrics mode if the direct url guessing didn't work.
It can often be the case if the song or artist have not been written exactly like lyricsmode stored it in its database (the cranberries vs cranberries) of because it contains some special characters (k's choice).
The modifications I applied are shown at the end of this post (didn't find how to attach a file). Would it be possible to commit them to the cu.lyrics code ?
Thanks in advance,
Yann
Code:
diff -ur script.cu.lyrics.orig/resources/lib/scrapers/lyricsmode/lyricsScraper.py script.cu.lyrics/resources/lib/scrapers/lyricsmode/lyricsScraper.py
--- script.cu.lyrics.orig/resources/lib/scrapers/lyricsmode/lyricsScraper.py 2012-04-01 20:13:54.106691515 +0200
+++ script.cu.lyrics/resources/lib/scrapers/lyricsmode/lyricsScraper.py 2012-04-08 23:33:00.699950122 +0200
@@ -139,6 +139,8 @@
self.clean_lyrics_regex = re.compile( "<.+?>" )
self.normalize_lyrics_regex = re.compile( "&#[x]*(?P<name>[0-9]+);*" )
self.clean_br_regex = re.compile( "<br[ /]*>[\s]*", re.IGNORECASE )
+ self.search_results_regex = re.compile("<a href=\"[^\"]+\">([^<]+)</a></td>[^<]+<td><a href=\"([^\"]+)\" class=\"b\">[^<]+</a></td>", re.IGNORECASE)
+ self.next_results_regex = re.compile("<A href=\"([^\"]+)\" class=\"pages\">next .</A>", re.IGNORECASE)
def get_lyrics_start(self, *args):
lyricThread = threading.Thread(target=self.get_lyrics_thread, args=args)
@@ -151,8 +154,36 @@
l.song = song
try: # below is borowed from XBMC Lyrics
url = "http://www.lyricsmode.com/lyrics/%s/%s/%s.html" % (song.artist.lower()[:1],song.artist.lower().replace(" ","_"), song.title.lower().replace(" ","_"), )
- print "Search url: %s" % (url)
- song_search = urllib.urlopen(url).read()
+
+ while True:
+ print "Search url: %s" % (url)
+ song_search = urllib.urlopen(url).read()
+ if song_search.find("<div id='songlyrics_h' class='dn'>") >= 0:
+ break
+
+ # Let's try to use the research box if we didn't yet
+ if not 'search' in url:
+ url = "http://www.lyricsmode.com/search.php?what=songs&s=" + urllib.quote_plus(song.title.lower())
+ else:
+ # the search gave several results, let's try to find our song
+ url = ""
+ start = song_search.find('<!--output-->')
+ end = song_search.find('<!--/output-->', start)
+ results = self.search_results_regex.findall(song_search, start, end)
+
+ for result in results:
+ if result[0].lower() in song.artist.lower():
+ url = "http://www.lyricsmode.com" + result[1]
+ break
+
+ if not url:
+ # Is there a next page of results ?
+ match = self.next_results_regex.search(song_search[end:])
+ if match:
+ url = "http://www.lyricsmode.com/search.php" + match.group(1)
+ else:
+ return None, "No lyrics found"
+
lyr = song_search.split("<div id='songlyrics_h' class='dn'>")[1].split('<!-- /SONG LYRICS -->')[0]
lyr = self.clean_br_regex.sub( "\n", lyr ).strip()
lyr = self.clean_lyrics_regex.sub( "", lyr ).strip()
diff -ur script.cu.lyrics.orig/resources/lib/song.py script.cu.lyrics/resources/lib/song.py
--- script.cu.lyrics.orig/resources/lib/song.py 2012-04-01 20:13:54.158691515 +0200
+++ script.cu.lyrics/resources/lib/song.py 2012-04-08 16:56:32.617536591 +0200
@@ -30,7 +30,9 @@
def current():
song = Song()
song.title = xbmc.getInfoLabel( "MusicPlayer.Title" )
+ song.title = utilities.deAccent(song.title)
song.artist = xbmc.getInfoLabel( "MusicPlayer.Artist")
+ song.artist = utilities.deAccent(song.artist)
print "Current Song: %s:%s" % (song.artist, song.title)