@@ -198,7 +198,17 @@ def parse(source: str | bytes) -> FastFeedParserDict:
198198 channel = child
199199 break
200200 if channel is None :
201- raise ValueError ("Invalid RSS feed: missing channel element" )
201+ # Fallback: Check if this is a malformed RSS with Atom-style elements
202+ # This handles feeds like seancdavis.com that declare RSS but use Atom structure
203+ has_atom_elements = any (
204+ child .tag in ['entry' , 'title' , 'subtitle' , 'updated' , 'id' , 'author' , 'link' ]
205+ for child in root
206+ )
207+ if has_atom_elements :
208+ # Treat the RSS root as the channel for malformed feeds
209+ channel = root
210+ else :
211+ raise ValueError ("Invalid RSS feed: missing channel element" )
202212 # Find items with or without namespace
203213 items = channel .findall ("item" )
204214 if not items :
@@ -214,6 +224,20 @@ def parse(source: str | bytes) -> FastFeedParserDict:
214224 # Try recursive search for deeply nested items (minified feeds)
215225 if not items :
216226 items = channel .xpath (".//item" ) or channel .xpath (".//*[local-name()='item']" )
227+
228+ # Fallback for malformed RSS: look for Atom-style <entry> elements
229+ if not items :
230+ items = channel .findall ("entry" )
231+ if not items :
232+ # Try to find entries with any namespace
233+ for child in channel :
234+ if child .tag .endswith ("}entry" ) or child .tag == "entry" :
235+ if not items :
236+ items = []
237+ items .append (child )
238+ # If still no entries found using findall with any namespace
239+ if not items :
240+ items = [child for child in channel if child .tag .endswith ("}entry" ) or child .tag == "entry" ]
217241 elif root .tag .endswith ("}feed" ):
218242 # Detect Atom namespace dynamically
219243 if "{http://www.w3.org/2005/Atom}" in root .tag :
@@ -834,7 +858,18 @@ def wrapper(
834858 else :
835859 result = _get_element_value (root , atom_css ) or _get_element_value (root , rdf_css )
836860
837- return result
861+ if result :
862+ return result
863+
864+ # Try unnamespaced Atom fields for malformed RSS feeds like seancdavis.com
865+ # Extract the local name from the namespaced atom_css
866+ if atom_css .startswith ("{" ) and "}" in atom_css :
867+ unnamespaced_atom = atom_css .split ("}" , 1 )[1 ]
868+ result = _get_element_value (root , unnamespaced_atom )
869+ if result :
870+ return result
871+
872+ return None
838873
839874 elif feed_type == "atom" :
840875
@@ -920,6 +955,17 @@ def _parse_date(date_str: str) -> Optional[str]:
920955 if not date_str :
921956 return None
922957
958+ # Fix invalid leap year dates (Feb 29 in non-leap years)
959+ # This handles feeds with incorrect dates like "2023-02-29"
960+ import re
961+ if re .match (r'(\d{4})-02-29' , date_str ):
962+ year_match = re .match (r'(\d{4})-02-29' , date_str )
963+ if year_match :
964+ year = int (year_match .group (1 ))
965+ if not ((year % 4 == 0 and year % 100 != 0 ) or (year % 400 == 0 )):
966+ # Not a leap year, change Feb 29 to Feb 28
967+ date_str = date_str .replace (f'{ year } -02-29' , f'{ year } -02-28' )
968+
923969 # Try dateutil.parser first
924970 try :
925971 dt = dateutil_parser .parse (date_str , tzinfos = custom_tzinfos , ignoretz = False )
0 commit comments