2007-07-24, 22:10
The TV.com HTML format changed slightly so I have managed to fix the TV.com scraper so that it returns a proper title, instead of an empty string. Here is the XML for anyone who is interested.
Code:
<?xml version="1.0" encoding="UTF-8"?>
<scraper name="TV.com" content="tvshows" thumb="tvcom.png">
<CreateSearchUrl dest="3">
<RegExp input="$$1" output="<url>http://www.tv.com/search.php?stype=program&amp;qs=\1&amp;tag=tv_shows</url>"
dest="3">
<expression></expression>
</RegExp>
</CreateSearchUrl>
<GetSearchResults dest="3">
<RegExp input="$$4" output="<results>\1</results>" dest="3">
<RegExp input="$$1" output="<entity><title>\2</title><url>http://www.tv.com/show/
\1/summary.html</url><url>http://www.tv.com/show/\1/cast.html</url><url>http://www.tv.com/show/
\1/episode_listings.html</url><id>\1</id></entity>" dest="4">
<expression repeat="yes">Show: <a class="f-bold f-C30" href="http://www\.tv\.com/[^/]*/show/([0-9]+)/
[^"]*">([^<]*)</a></expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
</GetSearchResults>
<GetDetails dest="7">
<RegExp input="$$5" output="<details>\1</details>" dest="7">
<RegExp input="$$1" output="<title>\1</title>" dest="5">
<expression><h1>([^>]*)</h1></expression>
</RegExp>
<RegExp input="$$1" output="<genre>\1</genre>" dest="5+">
<expression repeat="yes">;genre">([^>]*)</a></expression>
</RegExp>
<RegExp input="$$8" output="<plot>\1</plot>" dest="5+">
<RegExp input="$$1" output="\1" dest="8">
<expression>'Close Full Summary \[-\]', '([^+]*)', '</expression>
</RegExp>
<RegExp input="$$8" output="\1" dest="8">
<expression repeat="yes">([^\\]*)\\?</expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
<RegExp input="$$1" output="\1" dest="7">
<expression noclean="1">id="summary_fold" class="mt-10">[^a-zA-Z<]*(.*)</div>[^a-zA-z]
*</div>[^a-zA-Z]*<div class="divider"</expression>
</RegExp>
<RegExp input="$$7" output="\1" dest="7">
<expression trim="1">([^{]*)(</div>.*)</expression>
</RegExp>
<RegExp input="$$7" output="<plot>\1</plot>" dest="5+">
<expression></expression>
</RegExp>
<!-- Runtime is not being used here
<RegExp input="$$1" output="<runtime>\1 min</runtime>" dest="5+">
<expression>\(([0-9]*) min.\)</expression>
</RegExp> -->
<RegExp input="$$1" output="<rating>\1</rating>" dest="5+">
<expression>Score:</span>[^<]*<span[^>]*>([0-9\.]*)</span></expression>
</RegExp>
<RegExp input="$$1" output="<votes>\1</votes>" dest="5+">
<expression><span class="f-11">([0-9,]*) votes</span></expression>
</RegExp>
<RegExp input="$$2" output="<actor><name>\1</name><role>\2</role></actor>" dest="5+">
<expression repeat="yes">\?tag=stars[^>]*>([^<]*)</a><br />[^<]*<span class="f-
bold">Role: ([^<]*)</span></expression>
</RegExp>
<RegExp input="$$1" output="<thumb>\1</thumb>" dest="5+">
<expression>(http://image\.com\.com/tv/images/content_headers/program/[0-9]*\.jpg)</expression>
</RegExp>
<RegExp input="$$1" output="<status>\1</status>" dest="5+">
<expression trim="1">Status[^<]*<span class="f-333">[^a-zA-Z]*([^<]*)</span></expression>
</RegExp>
<RegExp input="$$1" output="<premiered>\1</premiered>" dest="5+">
<expression trim="1">Premiered:[^<]*<span class="f-333">([^<]*)</span></expression>
</RegExp>
<RegExp input="$$8" output="<episodeguide>\1</episodeguide>" dest="5+">
<RegExp input="$$3" output="<url>http://www.tv.com/show/$$4/episode_listings.html?season=1</url>" dest="8">
<expression>\|[^<]+Next Season</expression>
</RegExp>
<RegExp input="$$3" output="<url>http://www.tv.com/show/$$4/episode_listings.html?season=\1</url>" dest="8">
<expression repeat="yes"><option[^>]*tag=season_dropdown[^>]*>Season ([0-9]+)
</option></expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
</GetDetails>
<GetEpisodeList dest="3">
<RegExp input="$$5" output="<episodeguide>\1</episodeguide>" dest="3">
<RegExp input="$$1" output="\1" dest="6">
<expression><span class="f-18 f-bold">Season ([0-9]+)</span></expression>
</RegExp>
<RegExp input="$$1" output="<episode><title>\3</title><id>\2</id><url
>http://www.tv.com/episode/\2/summary.html</url><epnum>\1</epnum><season>$$6</season></episode>" dest="5">
<expression repeat="yes">nowrap="nowrap">[^0-9]*([0-9]+)[^<]*</td>[^<]*<td class="f-
bold">[^<]*<a[^>]*/episode/([0-9]*)/summary\.html\?tag=ep_list[^>]*>([^<]*)</a></expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
</GetEpisodeList>
<GetEpisodeDetails dest="3">
<RegExp input="$$5" output="<details>\1</details>" dest="3">
<RegExp input="$$1" output="\1" dest="6">
<expression><div id="main-col">(.*)<div class="ta-r mt-10 f-bold"></expression>
</RegExp>
<RegExp input="$$1" output="<title>\1</title>" dest="5">
<expression>Episode: <span class="f-FF9">([^<]*)</span><br /></expression>
</RegExp>
<RegExp input="$$1" output="<plot>\1</plot>" dest="5+">
<expression>div>([^=]*)<div class="ta-r mt-10 f-bold"></expression>
</RegExp>
<RegExp input="$$1" output="<rating>\1</rating>" dest="5+">
<expression><span class="f-28 f-bold mt-10 mb-10 f-FF9 db lh-18">([0-9.]+)</span></expression>
</RegExp>
<RegExp input="$$1" output="<votes>\1</votes>" dest="5+">
<expression><span class="f-11">([0-9,]+) votes</span></expression>
</RegExp>
<RegExp input="$$1" output="<aired>\1</aired>" dest="5+">
<expression>First Aired: ([^&]*)</expression>
</RegExp>
<RegExp input="$$1" output="<actor><name>\1</name><role>\2</role></actor>" dest="5+">
<expression repeat="yes">">([^<]*)</a> \(([^<]*)\)[^<]*<</expression>
</RegExp>
<RegExp input="$$1" output="<director>\1</director>" dest="5+">
<expression>Director:[^<]*</td>[^<]*<td>[^<]*<a[^>]*>([^<]*)</a></expression>
</RegExp>
<RegExp input="$$1" output="<credits>\1</credits>" dest="5+">
<expression>Writer:[^<]*</td>[^<]*<td>[^<]*<a[^>]*>([^<]*)</a></expression>
</RegExp>
<RegExp input="$$1" output="<thumb>\1</thumb>" dest="5+">
<expression>"video-thumb mb-5"[^>]*image:url\(([^)]*)\);"</expression>
</RegExp>
<RegExp input="$$1" output="<code>\1</code>" dest="5+">
<expression>Prod Code: ([^<]*)</span></expression>
</RegExp>
<expression noclean="1"></expression>
</RegExp>
</GetEpisodeDetails>
</scraper>