python - ver - Filtrado de directorios y archivos os.walk()
rutas de archivos en python (8)
¿Por qué fnmatch?
import os
excludes=....
for ROOT,DIR,FILES in os.walk("/path"):
for file in FILES:
if file.endswith((''doc'',''odt'')):
print file
for directory in DIR:
if not directory in excludes :
print directory
no exhaustivamente probado
Estoy buscando una forma de incluir / excluir patrones de archivos y excluir directorios de una os.walk()
a os.walk()
.
Esto es lo que estoy haciendo ahora:
import fnmatch
import os
includes = [''*.doc'', ''*.odt'']
excludes = [''/home/paulo-freitas/Documents'']
def _filter(paths):
matches = []
for path in paths:
append = None
for include in includes:
if os.path.isdir(path):
append = True
break
if fnmatch.fnmatch(path, include):
append = True
break
for exclude in excludes:
if os.path.isdir(path) and path == exclude:
append = False
break
if fnmatch.fnmatch(path, exclude):
append = False
break
if append:
matches.append(path)
return matches
for root, dirs, files in os.walk(''/home/paulo-freitas''):
dirs[:] = _filter(map(lambda d: os.path.join(root, d), dirs))
files[:] = _filter(map(lambda f: os.path.join(root, f), files))
for filename in files:
filename = os.path.join(root, filename)
print filename
La pregunta es: ¿hay una mejor manera de hacer esto? ¿Cómo?
Aquí hay una manera de hacerlo
import fnmatch
import os
excludes = [''/home/paulo-freitas/Documents'']
matches = []
for path, dirs, files in os.walk(os.getcwd()):
for eachpath in excludes:
if eachpath in path:
continue
else:
for result in [os.path.abspath(os.path.join(path, filename)) for
filename in files if fnmatch.fnmatch(filename,''*.doc'') or fnmatch.fnmatch(filename,''*.odt'')]:
matches.append(result)
print matches
De docs.python.org :
os.walk (arriba [, topdown = True [, onerror = None [, followlinks = False]]])
Cuando topdown es True, la persona que llama puede modificar la lista de dirnames en el lugar ... esto se puede utilizar para podar la búsqueda ...
for root, dirs, files in os.walk(''/home/paulo-freitas'', topdown=True):
# excludes can be done with fnmatch.filter and complementary set,
# but it''s more annoying to read.
dirs[:] = [d for d in dirs if d not in excludes]
for pat in includes:
for f in fnmatch.filter(files, pat):
print os.path.join(root, f)
Debo señalar que el código anterior supone que excludes
es un patrón, no una ruta completa. Debería ajustar la comprensión de la lista para filtrar si os.path.join(root, d) not in excludes
para que coincida con el caso OP.
Esta solución usa fnmatch.translate
para convertir patrones glob en expresiones regulares (supone que las fnmatch.translate
solo se usan para archivos):
import fnmatch
import os
import os.path
import re
includes = [''*.doc'', ''*.odt''] # for files only
excludes = [''/home/paulo-freitas/Documents''] # for dirs and files
# transform glob patterns to regular expressions
includes = r''|''.join([fnmatch.translate(x) for x in includes])
excludes = r''|''.join([fnmatch.translate(x) for x in excludes]) or r''$.''
for root, dirs, files in os.walk(''/home/paulo-freitas''):
# exclude dirs
dirs[:] = [os.path.join(root, d) for d in dirs]
dirs[:] = [d for d in dirs if not re.match(excludes, d)]
# exclude/include files
files = [os.path.join(root, f) for f in files]
files = [f for f in files if not re.match(excludes, f)]
files = [f for f in files if re.match(includes, f)]
for fname in files:
print fname
Este es un ejemplo de exclusión de directorios y archivos con os.walk()
:
ignoreDirPatterns=[".git"]
ignoreFilePatterns=[".php"]
def copyTree(src, dest, onerror=None):
src = os.path.abspath(src)
src_prefix = len(src) + len(os.path.sep)
for root, dirs, files in os.walk(src, onerror=onerror):
for pattern in ignoreDirPatterns:
if pattern in root:
break
else:
#If the above break didn''t work, this part will be executed
for file in files:
for pattern in ignoreFilePatterns:
if pattern in file:
break
else:
#If the above break didn''t work, this part will be executed
dirpath = os.path.join(dest, root[src_prefix:])
try:
os.makedirs(dirpath,exist_ok=True)
except OSError as e:
if onerror is not None:
onerror(e)
filepath=os.path.join(root,file)
shutil.copy(filepath,dirpath)
continue;#If the above else didn''t executed, this will be reached
continue;#If the above else didn''t executed, this will be reached
python> = 3.2 debido a exist_ok
en makedirs
Los métodos anteriores no me funcionaron.
Entonces, esto es lo que surgió con una expansión de mi respuesta original a otra pregunta .
Lo que funcionó para mí fue:
if (not (str(root) + ''/'').startswith(tuple(exclude_foldr)))
que compiló un camino y excluyó la tupla de mis carpetas enumeradas.
Esto me dio el resultado exacto que estaba buscando.
Mi objetivo para esto era mantener mi Mac organizado.
Puedo Buscar en cualquier folder
por path
, locate & move
file.types
específicos, ignore subfolders
y, de forma preventiva, prompt the user
si want to move
los archivos.
NOTA: el
Prompt
es solo una vez por corrida y NO es por archivo
De manera predeterminada, el indicador predeterminado es NO
cuando presiona enter en lugar de [y / N], y solo mostrará una lista de los archivos Potential
para mover.
Este es solo un fragmento de mi GitHub. Visítelo para ver el guion total.
SUGERENCIA: Lea el script a continuación, ya que agregué información por línea sobre lo que había hecho.
#!/usr/bin/env python3
# =============================================================================
# Created On : MAC OSX High Sierra 10.13.6 (17G65)
# Created On : Python 3.7.0
# Created By : Jeromie Kirchoff
# =============================================================================
"""THE MODULE HAS BEEN BUILD FOR KEEPING YOUR FILES ORGANIZED."""
# =============================================================================
from os import walk
from os import path
from shutil import move
import getpass
import click
mac_username = getpass.getuser()
includes_file_extensn = ([".jpg", ".gif", ".png", ".jpeg", ])
search_dir = path.dirname(''/Users/'' + mac_username + ''/Documents/'')
target_foldr = path.dirname(''/Users/'' + mac_username + ''/Pictures/Archive/'')
exclude_foldr = set([target_foldr,
path.dirname(''/Users/'' + mac_username +
''/Documents/GitHub/''),
path.dirname(''/Users/'' + mac_username +
''/Documents/Random/''),
path.dirname(''/Users/'' + mac_username +
''/Documents/Stupid_Folder/''),
])
if click.confirm("Would you like to move files?",
default=False):
question_moving = True
else:
question_moving = False
def organize_files():
"""THE MODULE HAS BEEN BUILD FOR KEEPING YOUR FILES ORGANIZED."""
# topdown=True required for filtering.
# "Root" had all info i needed to filter folders not dir...
for root, dir, files in walk(search_dir, topdown=True):
for file in files:
# creating a directory to str and excluding folders that start with
if (not (str(root) + ''/'').startswith(tuple(exclude_foldr))):
# showcase only the file types looking for
if (file.endswith(tuple(includes_file_extensn))):
# using path.normpath as i found an issue with double //
# in file paths.
filetomove = path.normpath(str(root) + ''/'' +
str(file))
# forward slash required for both to split
movingfileto = path.normpath(str(target_foldr) + ''/'' +
str(file))
# Answering "NO" this only prints the files "TO BE Moved"
print(''Files To Move: '' + str(filetomove))
# This is using the prompt you answered at the beginning
if question_moving is True:
print(''Moving File: '' + str(filetomove) +
"/n To:" + str(movingfileto))
# This is the command that moves the file
move(filetomove, movingfileto)
pass
# The rest is ignoring explicitly and continuing
else:
pass
pass
else:
pass
else:
pass
if __name__ == ''__main__'':
organize_files()
Ejemplo de ejecutar mi script desde la terminal:
$ python3 organize_files.py
Exclude list: {''/Users/jkirchoff/Pictures/Archive'', ''/Users/jkirchoff/Documents/Stupid_Folder'', ''/Users/jkirchoff/Documents/Random'', ''/Users/jkirchoff/Documents/GitHub''}
Files found will be moved to this folder:/Users/jkirchoff/Pictures/Archive
Would you like to move files?
No? This will just list the files.
Yes? This will Move your files to the target folder.
[y/N]:
Ejemplo de listado de archivos:
Files To Move: /Users/jkirchoff/Documents/Archive/JayWork/1.custom-award-768x512.jpg
Files To Move: /Users/jkirchoff/Documents/Archive/JayWork/10351458_318162838331056_9023492155204267542_n.jpg
...etc
Ejemplo de mover archivos:
Moving File: /Users/jkirchoff/Documents/Archive/JayWork/1.custom-award-768x512.jpg
To: /Users/jkirchoff/Pictures/Archive/1.custom-award-768x512.jpg
Moving File: /Users/jkirchoff/Documents/Archive/JayWork/10351458_318162838331056_9023492155204267542_n.jpg
To: /Users/jkirchoff/Pictures/Archive/10351458_318162838331056_9023492155204267542_n.jpg
...
dirtools es perfecto para su caso de uso:
from dirtools import Dir
print(Dir(''.'', exclude_file=''.gitignore'').files())
import os
includes = [''*.doc'', ''*.odt'']
excludes = [''/home/paulo-freitas/Documents'']
def file_search(path, exe):
for x,y,z in os.walk(path):
for a in z:
if a[-4:] == exe:
print os.path.join(x,a)
for x in includes:
file_search(excludes[0],x)