3030import difflib
3131import collections
3232import distutils .sysconfig
33+ import fnmatch
3334import io
3435import os
3536import re
4849ATOMS = frozenset ([tokenize .NAME , tokenize .NUMBER , tokenize .STRING ])
4950
5051EXCEPT_REGEX = re .compile (r'^\s*except [\s,()\w]+ as \w+:$' )
52+ PYTHON_SHEBANG_REGEX = re .compile (r'^#!.*\bpython[23]?\b\s*$' )
5153
54+ MAX_PYTHON_FILE_DETECTION_BYTES = 1024
5255
5356try :
5457 unicode
@@ -549,21 +552,25 @@ def fix_file(filename, args, standard_out):
549552 standard_out .write ('' .join (diff ))
550553
551554
552- def open_with_encoding (filename , encoding , mode = 'r' ):
555+ def open_with_encoding (filename , encoding , mode = 'r' ,
556+ limit_byte_check = - 1 ):
553557 """Return opened file with a specific encoding."""
558+ if not encoding :
559+ encoding = detect_encoding (filename , limit_byte_check = limit_byte_check )
560+
554561 return io .open (filename , mode = mode , encoding = encoding ,
555562 newline = '' ) # Preserve line endings
556563
557564
558- def detect_encoding (filename ):
565+ def detect_encoding (filename , limit_byte_check = - 1 ):
559566 """Return file encoding."""
560567 try :
561568 with open (filename , 'rb' ) as input_file :
562569 encoding = _detect_encoding (input_file .readline )
563570
564571 # Check for correctness of encoding.
565572 with open_with_encoding (filename , encoding ) as input_file :
566- input_file .read ()
573+ input_file .read (limit_byte_check )
567574
568575 return encoding
569576 except (LookupError , SyntaxError , UnicodeDecodeError ):
@@ -600,6 +607,69 @@ def get_diff_text(old, new, filename):
600607 return text
601608
602609
610+ def _split_comma_separated (string ):
611+ """Return a set of strings."""
612+ return set (text .strip () for text in string .split (',' ) if text .strip ())
613+
614+
615+ def is_python_file (filename ):
616+ """Return True if filename is Python file."""
617+ if filename .endswith ('.py' ):
618+ return True
619+
620+ try :
621+ with open_with_encoding (
622+ filename ,
623+ None ,
624+ limit_byte_check = MAX_PYTHON_FILE_DETECTION_BYTES ) as f :
625+ text = f .read (MAX_PYTHON_FILE_DETECTION_BYTES )
626+ if not text :
627+ return False
628+ first_line = text .splitlines ()[0 ]
629+ except (IOError , IndexError ):
630+ return False
631+
632+ if not PYTHON_SHEBANG_REGEX .match (first_line ):
633+ return False
634+
635+ return True
636+
637+
638+ def match_file (filename , exclude ):
639+ """Return True if file is okay for modifying/recursing."""
640+ base_name = os .path .basename (filename )
641+
642+ if base_name .startswith ('.' ):
643+ return False
644+
645+ for pattern in exclude :
646+ if fnmatch .fnmatch (base_name , pattern ):
647+ return False
648+ if fnmatch .fnmatch (filename , pattern ):
649+ return False
650+
651+ if not os .path .isdir (filename ) and not is_python_file (filename ):
652+ return False
653+
654+ return True
655+
656+
657+ def find_files (filenames , recursive , exclude ):
658+ """Yield filenames."""
659+ while filenames :
660+ name = filenames .pop (0 )
661+ if recursive and os .path .isdir (name ):
662+ for root , directories , children in os .walk (name ):
663+ filenames += [os .path .join (root , f ) for f in children
664+ if match_file (os .path .join (root , f ),
665+ exclude )]
666+ directories [:] = [d for d in directories
667+ if match_file (os .path .join (root , d ),
668+ exclude )]
669+ else :
670+ yield name
671+
672+
603673def _main (argv , standard_out , standard_error ):
604674 """Return exit status.
605675
@@ -630,6 +700,9 @@ def _main(argv, standard_out, standard_error):
630700 parser .add_argument ('--version' , action = 'version' ,
631701 version = '%(prog)s ' + __version__ )
632702 parser .add_argument ('files' , nargs = '+' , help = 'files to format' )
703+ parser .add_argument ('--exclude' , metavar = 'globs' ,
704+ help = 'exclude file/directory names that match these '
705+ 'comma-separated globs' )
633706
634707 args = parser .parse_args (argv [1 :])
635708
@@ -638,21 +711,17 @@ def _main(argv, standard_out, standard_error):
638711 file = standard_error )
639712 return 1
640713
714+ if args .exclude :
715+ args .exclude = _split_comma_separated (args .exclude )
716+ else :
717+ args .exclude = set ([])
718+
641719 filenames = list (set (args .files ))
642- while filenames :
643- name = filenames .pop (0 )
644- if args .recursive and os .path .isdir (name ):
645- for root , directories , children in os .walk (unicode (name )):
646- filenames += [os .path .join (root , f ) for f in children
647- if f .endswith ('.py' ) and
648- not f .startswith ('.' )]
649- directories [:] = [d for d in directories
650- if not d .startswith ('.' )]
651- else :
652- try :
653- fix_file (name , args = args , standard_out = standard_out )
654- except IOError as exception :
655- print (unicode (exception ), file = standard_error )
720+ for name in find_files (filenames , args .recursive , args .exclude ):
721+ try :
722+ fix_file (name , args = args , standard_out = standard_out )
723+ except IOError as exception :
724+ print (unicode (exception ), file = standard_error )
656725
657726
658727def main ():
0 commit comments