使用python对文件进行批量处理

使用python对文件进行批量处理

2023年7月3日发(作者:)

使⽤python对⽂件进⾏批量处理代码有点长,包括为类了,主要是对⽂件进⾏批量处理使⽤1、批量移动⽂件,符合某种后缀的2、批量查找两个⽂件夹重复的⽂件3、批量同步两个⽂件夹的⽂件2和3,我现在⽤duplicate这个软件,同步的话使⽤File Synchronizer这个软件,代码的话不怎么⽤了4、批量移动和复制⽂件的时候会遇到⼀个问题,就是若存在相同⽂件名的情况,这个时候可以在⽂件名后加“-1”,“-2”这种⽅式来解决,⽐之前的⽤随机时间的要好,那个产⽣的⽂件名太长了5、批量提取docx⽂档中的图⽚(如何批量提取doc中的呢?去excelhome论坛搜索vba转doc为docx的代码,批量转换即可

Sub doc2docx() 'doc⽂件转docx⽂件

Dim myDialog As FileDialog Set myDialog = alog(msoFileDialogFilePicker) Dim oFile As Object Dim oFilePath As Variant

With myDialog . '清除所有⽂件筛选器中的项⽬ . "所有 WORD2007 ⽂件", "*.doc", 1 '增加筛选器的项⽬为所有doc⽂件 .AllowMultiSelect = True '允许多项选择 If .Show = -1 Then '确定 For Each oFilePath In .SelectedItems '在所有选取项⽬中循环 Set oFile = (oFilePath) FileName:=Replace(oFilePath, "doc", "docx"), FileFormat:=16 Next End If

End WithEnd Sub   1 from genericpath import exists 2 import os 3 import shutil 4 from os import path 5 from pathlib import Path 6 from hashlib import md5 7 from PIL import Image 8 import zipfile 9 from send2trash import send2trash 10

11 # revides by Stephen Shen @zju 12 # 2021-4-8 10:14:22 13 # /digressions/python/2020/04/13/#pathrenametarget 14 # /project/Send2Trash/ 15 # revised by Stephen Shen @zju 16 # 2021年3⽉15⽇09:24:10 17 # zipfile 模块使⽤说明 18 # /ManyQian/p/ 19 # 20 # le("oldfile","newfile") #oldfile和newfile都只能是⽂件 21 # ("oldfile","newfile") #oldfile只能是⽂件夹,newfile可以是⽂件,也可以是⽬标⽬录 22 # #复制⽂件夹: 23 # ee("olddir","newdir") #olddir和newdir都只能是⽬录,且newdir必须不存在 24 # #重命名⽂件(⽬录) 25 # ("oldname","newname") #⽂件或⽬录都是使⽤这条命令 26 # #移动⽂件(⽬录) 27 # ("oldpos","newpos") 28

29

30 class PathBox(): 31 def __init__(self): 32 pass 33

34 @staticmethod 35 def batchExtractPicsFromDocs(srcDir, dstDir, zipDir): 36 dst_dir = Path(dstDir) 37 src_dir = Path(srcDir) 38 zip_dir = Path(zipDir) 39

40 if not dst_(): 41 dst_() 42 if not src_(): 43 src_() 44 if not zip_(): 45 zip_() 46

47 for root, dirs, files in (src_dir): 48 for f in files: 49 src_path = Path(root).joinpath(f) 50 if src_ in ['.docx']: 51 # dst_sub_dir = dst_th(src_) 52 # if not dst_sub_(): 53 # dst_sub_() 54

55 zip_path = zip_th(src_+'.zip') 56 if not zip_(): 57 Zip(src_path, zip_path) 58 print('{} is copied as zip file'.format(zip_path)) 59 else: 60 print('{} is existed'.format(str(zip_path))) 61 # pics_dir = Path(tPics( 62 # dst_dir, zip_path, zip_dir)) 63

64 # if not pics_(): 65 # oveFilesToOneFolder( 66 # pics_dir, dst_dir, ['.jpeg', '.png']) 67 else: 68 print('{} is not docx'.format(str(src_path))) 69

70 tZipFile(zip_dir, dst_dir) 71

72 @staticmethod 73 def copyAsZip(srcpath, dstpath): 74 le(srcpath, dstpath) 75

76 @staticmethod 77 def extractPics(dstDir, zippath, zipDir): 78 # first clear the zipdir directory 79 # 将docx⽂档复制为*.zip格式 80

81 # 解压缩⽂件 82 try: 83 with e(zippath, 'r') as f: 84 print('{zippath} is extracted'.format(zippath=zippath)) 85 tall(zipDir) 86 except: 87 print('{zippath} cannot be extracted'.format(zippath=zippath)) 88 else: 89 picsDir = Path(zipDir).joinpath('word/media') 90 return picsDir 91

92 # if (picsDir): 93 # for pic in r(picsDir): 94 # oldpic=(picsDir,pic) 95 # newpic=(out_dir,pic) 96 # try: 97 # (oldpic,newpic) 98 # except: 99 # print(inDir+' is skipped')100

101 # filelist=r(zipDir)102 # for f in filelist:103 # filepath = (zipDir, f )104 # if (filepath):105 # (filepath)106 # elif (filepath):107 # (filepath,True)108

109 @staticmethod110 def getFileMd5(file_name):111 """112 计算⽂件的md5113 :param file_name:114 :return:115 """116 m = md5() # 创建md5对象117 with open(file_name, 'rb') as fobj:118 while True:119 data = (4096)120 if not data:121 break122 (data) # 更新md5对象123

124 return est() # 返回md5对象125 126 @staticmethod127 def syncFiles(srcDir, dstDir):128 src_dir = Path(srcDir)129 dst_dir = Path(dstDir)130 for root, dirs, files in (src_dir):131 for f in files:132 src_path = Path(root).joinpath(f)133 rel_path = src_ve_to(src_dir)134 dst_path = dst_th(rel_path)135 if dst_():136 if e(src_path) == e(dst_path):137 print('{} is existed'.format(str(src_path)))138 else:139 le(src_path, dst_path)140 print('{} is copied'.format(str(src_path)))141 pass142 else:143 if not dst_():144 dst_(parents=True, exist_ok=True)145 le(src_path, dst_path)146 print('{} is copied'.format(str(src_path)))147 pass148

149 @staticmethod150 def extractZipFile(srcDir, dstDir):151 src_dir = srcDir152 dst_dir = dstDir153 for root, dirs, files in (src_dir):154 for f in files:155 file_path = Path(root).joinpath(f)156 if file_ in ['.zip', '.rar']:157 try:158 with e(str(file_path), 'r') as f:159 zip_dir = Path(src_dir).joinpath(file_)160 if not zip_():161 zip_()162 print('{} is extracted'.format(str(file_path)))163 tall(zip_dir)164 except:165 print('{} cannot be extracted'.format(str(file_path)))166

167 @staticmethod168 def getImageMd5(img_path):169 try:170 hash = md5()171 img = open(img_path, 'rb')172 (())173 ()174 img_md5 = est()175 return img_md5176 except:177 return None178

179 @staticmethod180 def batchRenameFileSuffix(srcDir):181 # 批量修改⽬录下指定类型的后缀182 for root, dirs, files in (srcDir):183 for f in files:184 srcpath = Path((root, f))185 if in ['.jpeg', '.jpg']:186 newfilename = +'.JPG'187 ( / newfilename)188 print('{} is renamed'.format(srcpath))189

190 @staticmethod191 def compareTwoDirsByCount(srcDir, dstDir):192 rootdirs = r(srcDir)193

194 for rootdir in rootdirs:195 srcpath = (srcDir, rootdir)196 dstpath = (dstDir, rootdir)197 src_count = 0198 dst_count = 0199 for root, dirs, files in (srcpath):200 for f in files:201 src_count += 1202 for root, dirs, files in (dstpath):203 for f in files:204 dst_count += 1205 if src_count == dst_count:206 (srcpath)207 print('{} is removed'.format(srcpath))208

209 @staticmethod210 def batchRenameFileName(srcDir):211 # 批量修改⽬录下的⽂件名212 index = 1213 for root, dirs, files in (srcDir):214 root_path = Path(root)215 for f in files:216 file_path = root_th(f)217 new_file_path = file_path218 index = 1219 while True:220 new_file_name = str(index)+file_221 new_file_path = new_file__name(new_file_name)222 if not new_file_():223 break224 else:225 index += 1226 file_(new_file_path)227 print('{} is renamed'.format(str(file_path)))228

229 @staticmethod230 def tongji(srcDir):231 for root, dirs, files in (srcDir):232 if root == srcDir:233 pass234 else:235 count = len(r(root))236 print('{0} have total of {1} files'.format(root, count))237

238 @staticmethod239 def rmEmptyDirs(srcDir):240 for root, dirs, files in (srcDir):241 if root == srcDir:242 pass243 else:244 count = len(sorted(Path(root).rglob('**/*.*')))245 if count == 0:246 try:247 Path(root).rmdir()248 # (root)249 except:250 pass251

252 @staticmethod253 def batchResizePics(srcDir):254

255 dstDir = srcDir+"-resize"256

257 size = (800, 600)258

259 # print("picture resizing is processing,")260 for root, dirs, files in (srcDir):261 newroot = Path(e(srcDir, dstDir))262 if not ():263 (parents=True, exist_ok=True)264 # (newroot)265

266 for file in files:267 (filename, extension) = xt(file)268 if extension in ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']:269 newfile = th(file)270 oldfile = Path(root).joinpath(file)271 try:272 # print('processing')273 im = (oldfile)274 if not == size:275 ail(size)276 (newfile, "jpeg")277 print('{} is thumbnailed'.format(oldfile))278 else:279 try:280 le(oldfile, newfile)281 # (oldfile, newfile)282 # print('{} is moved'.format(oldfile))283 except:284 print(Exception)285 except IOError:286 pass287 return dstDir288

289 @staticmethod290 def batchMoveNonePicsToOneFolder(srcDir, dstDir='parent'):291 fexts = ['.jpg', '.png', '.jpeg', '.JPG', '.PNG', '.JPEG']292 for root, dirs, files in (srcDir):293 for f in files:294 src_path = Path(root).joinpath(f)295 if not src_ in fexts:296 dst_path = Path(srcDir).joinpath(f)297 (src_path, dst_path)298 print('{} is moved'.format(src_path))299

300 @staticmethod301 # 查询⽂件夹⾥是否有⾮IMG开头的⽂件302 def excludeFilesByName(srcDir, dstDir='parent'):303 src_dir = Path(srcDir)304 if dstDir == 'parent':305 dst_dir = src_dir306 else:307 dst_dir = Path(dstDir)308 for root, dirs, files in (src_dir):309 if dstDir == 'parent' and root == srcDir:310 continue311 else:312 for f in files:313 if with("IMG"):314 pass315 else:316 src_path = Path(root).joinpath(f)317 dst_path = dst_th(f)318 le(src_path, dst_path)319

320 @staticmethod321 def batchMoveFilesToOneFolder(srcDir, dstDir='parent', fexts='all'):322 src_dir = Path(srcDir)323 if dstDir == 'parent':324 dstDir = srcDir325

326 for root, dirs, files in (srcDir):327 if dstDir == 'parent' and root == srcDir:328 pass329 else:330 for f in files:331 src_path = Path(root).joinpath(f)332 if not dstDir:333 dst_path = src_th(f)334 if fexts == 'all':335 le(src_path, dst_path)336 print('{} is moved'.format(src_path))337 else:338 if src_ in fexts:339 le(src_path, dst_path)340 print('{} is moved'.format(src_path))341 else:342 dst_path = Path(dstDir).joinpath(f)343 if fexts == 'all':344 le(src_path, dst_path)345 print('{} is moved'.format(src_path))346 else:347 if src_ in fexts:348 le(src_path, dst_path)349 print('{} is moved'.format(src_path))350

351 @staticmethod352 def moveFile(src_path, dst_path):353 index = 1354 new_dst_path = dst_path355 while True:356 if new_dst_():357 new_dst_path = dst__name(358 dst_+'_'+str(index)+dst_)359 index += 1360 else:361 break362 (src_path, dst_path)363 print('{} is moved'.format(src_path))364

365 @staticmethod366 def copyFile(src_path, dst_path):367 index = 1368 while True:369 if dst_():370 dst_path = dst__name(371 dst_+'_'+str(index)+dst_)372 index += 1373 else:374 break375 try:376 le(src_path, dst_path)377 except:378 print(Exception)379

380 @staticmethod381 def compareDirsDeleteTheSameFile(srcDir, dstDir, mode='keep'):382 # compare two dirs and delete the same file in the srcDir383 for root, dirs, files in (srcDir):384 for f in files:385 src_path = Path((root, f))386 rel_path = src_ve_to(Path(srcDir))387 dst_path = Path(dstDir).joinpath(rel_path)388 if dst_():389 if mode == 'keep':390 pass391 if mode == 'delete':392 try:393 send2trash(str(src_path))394 # (src_path)395 print('{} is removed'.format(src_path))396 except:397 print('{} cannot be removed'.format(src_path))398 pass399

400 @staticmethod401 def batchRemoveTheSameFileByMD5(srcKeepDir, srcCompareDirs=[]):402 zd = {}403 src_keep_dir = Path(srcKeepDir)404 for root, dirs, files in (src_keep_dir):405 for f in files:406 f_path = Path(root).joinpath(f)407 img_md5 = eMd5(f_path)408 # img_md5 = geMd5(f_path)409 if img_md5:410 if not img_md5 in ():411 zd[img_md5] = f_path412 else:413 send2trash(str(f_path))414 print('{} is removed'.format(f_path))415

416 if srcCompareDirs:417 for folder in srcCompareDirs:418 src_compare_dir = Path(folder)419 for root, dirs, files in (src_compare_dir):420 for f in files:421 f_path = Path(root).joinpath(f)422 img_md5 = eMd5(f_path)423 if img_md5:424 if not img_md5 in ():425 zd[img_md5] = f_path426 else:427 src_path = f_path428 # dst_path=(dstDir,src_)429 send2trash(str(src_path))430 # (src_path)431 # (src_path,dst_path)432 print('{} is removed'.format(src_path))433

434

435 if __name__ == '__main__':436 # ----------------extract pics from docx--------------------437 # srcDir = r'D:Civil32109012_⼯具包_参考资料_桥梁检测报告'438 # dstDir = r'D:Civilextract'439 # zipDir = r'D:Civilzip'440 # xtractPicsFromDocs(srcDir, dstDir, zipDir)441 # oveFilesToOneFolder(zipDir, dstDir, fexts=['.jpg', '.png', '.emf', '.jpeg'])442

443 # ----------------bacth move files via fexts--------------------444 # oveFilesToOneFolder(srcDir, fexts=['.doc'])445 # srcDir = r'D:Civil32109012_⼯具包_softSmall'446 # oveFilesToOneFolder(srcDir)447

448 # ----------------bacth exclue the same file in dirs--------------------449 srcKeepDir = r'D:_soft'450 # srcCompareDirs = [r'D:test']451 emoveTheSameFileByMD5(srcKeepDir)452 # emoveTheSameFileByMD5(srcKeepDir, srcCompareDirs)453

454 # ----------------bacth resize the images in dirs--------------------455 # srcDir = r'D:BaiduNetdiskDownload温州东瓯DAQIAO'456 # # dstDir = r'D:衢州报告-resize'457 # dstDir = esizePics(srcDir)458 # eDirsDeleteTheSameFile(srcDir, dstDir, mode='delete')459 # yDirs(srcDir)460

461 # ----------------bacth sync the files between two dirs----------------462 # srcDir = r'C:UsersAdministratorDocumentsdebugwolf'463 # dstDir = r'C:UsersAdministratorDocumentsdebug_待整理'464 # les(srcDir, dstDir)465 # eDirsDeleteTheSameFile(srcDir, dstDir, mode='delete')466 # yDirs(srcDir)467

468 # ----------------bacth sync the files between two dirs----------------469 # srcDir = r'D:衢州报告'470 # eFilesByName(srcDir)View Code

发布者:admin,转转请注明出处:http://www.yc00.com/xiaochengxu/1688380791a129395.html

相关推荐

发表回复

评论列表(0条)

  • 暂无评论

联系我们

400-800-8888

在线咨询: QQ交谈

邮件:admin@example.com

工作时间:周一至周五,9:30-18:30,节假日休息

关注微信