批处理新手入门导读[视频教程]批处理基础视频教程[视频教程]VBS基础视频教程
[批处理文件精品]批处理版照片整理器[批处理文件精品]纯批处理备份&还原驱动在线第三方下载
返回列表 发帖

[原创] vbs 转换文件编码和换行符

可能会出现更新,见 https://www.bajins.com/Shell/WindowsVBScript.html#%E6%96%87%E4%BB%B6%E7%BC%96%E7%A0%81%E8%BD%AC%E6%8D%A2
  1. '用法:将文件夹拖到该vbs上(或者双击选择文件),输入要转换成的字符编码
  2. Call Run
  3. Function Run()
  4.     Dim fdpath,charset
  5.     On Error Resume Next
  6.     If WScript.Arguments.Length>=1 Then
  7.         fdpath = WScript.Arguments(0)
  8.     Else
  9.         hta="""about:<input type=file id=f><script>f.click();" & _
  10.         "new ActiveXObject('Scripting.FileSystemObject').GetStandardStream(1).WriteLine(f.value);" & _
  11.         "close();resizeTo(0,0);</script>"""
  12.         ' 打开对话框,并输出选择的,文件的路径
  13.         fdpath = CreateObject("WScript.Shell").Exec("mshta.exe " & hta ).StdOut.ReadLine
  14.         If fdpath = "" Then
  15.             WScript.echo "必须输入文件路径"
  16.             WScript.Quit
  17.         End If
  18.     End If
  19.     If WScript.Arguments.Length>=2 Then
  20.         charset = WScript.Arguments(1)
  21.     Else
  22.         charset = InputBox("字符编码,默认UTF-8","请输入字符编码","UTF-8")
  23.         if charset = "" then
  24.             WScript.echo "必须输入字符编码"
  25.             WScript.Quit
  26.         End If
  27.         charset = UCase(charset)
  28.         if StrComp(replace(charset,"-",""), "UTF8") <> 0 _
  29.             and StrComp(replace(charset,"-",""), "UTF8BOM") <> 0 _
  30.             and StrComp(charset, "GB2312") <> 0 _
  31.             and StrComp(charset, "UNICODE") <> 0 _
  32.             and StrComp(charset, "ANSI") <> 0 then
  33.                 WScript.echo "不支持的格式: " & charset
  34.                 WScript.Quit
  35.         End if
  36.         ' ANSI 并不是一种编码,ANSI 实际对应的编码跟系统设置的代码页有关,
  37.         ' 在简体中文系统中代码页默认是936,对应 GB2312 编码
  38.         if charset = "ANSI" then charset = "GB2312"
  39.     End If
  40.     ' Set fso = CreateObject("scripting.filesystemobject")
  41.     ' Set fd = fso.GetFolder(fdpath)
  42.     ' Set fl=fd.Files
  43.     ' For each f in fl
  44.     '     WriteToFile f.Path,replace(ReadFile(fdpath,CheckCode(fdpath)),vbLf,vbCrLf),charset
  45.     ' Next
  46.     oldCharSet = UCase(CheckCode(fdpath))
  47.     WScript.echo "当前编码 "& oldCharSet &" 转换成编码" & charset
  48.     If StrComp(charset, oldCharSet) = 0 Then
  49.         MsgBox charset & " == " & oldCharSet,,"字符编码相同提示"
  50.         WScript.Quit
  51.     End If
  52.     If StrComp(oldCharSet, "UTF-8-BOM") =0 and StrComp(replace(charset,"-",""), "UTF-8") = 0 Then
  53.         MsgBox replace(ReadFile(fdpath,oldCharSet),vbLf,vbCrLf)
  54.         UF8NoBOM fdpath
  55.     Else
  56.         WriteToFile fdpath,replace(ReadFile(fdpath,oldCharSet),vbLf,vbCrLf),charset
  57.     End If
  58.     MsgBox "转换后编码:" & CheckCode(fdpath),,"字符编码转换结束提示"
  59. End Function
  60. ' UTF-8-BOM 转 UTF8
  61. Function UF8NoBOM(Path)
  62.     Set UTFStream = CreateObject("ADODB.Stream")
  63.     UTFStream.Type = 1
  64.     UTFStream.Mode = 3
  65.     UTFStream.Open
  66.     UTFStream.Position = 0
  67.     UTFStream.loadfromfile Path
  68.     ' Bin=UTFStream.read(2)
  69.     Set BinaryStream = CreateObject("ADODB.Stream")
  70.     ' 移动首字符的字节数据至流开始位置,去除BOM(前3个字节)
  71.     UTFStream.Position = 3
  72.     BinaryStream.Type = 1
  73.     BinaryStream.Mode = 3
  74.     BinaryStream.Open
  75.     UTFStream.CopyTo BinaryStream
  76.     BinaryStream.SaveToFile Path, 2
  77.     BinaryStream.Flush
  78.     BinaryStream.Close
  79.     UTFStream.Flush
  80.     UTFStream.Close
  81. End Function
  82. ' 将读取的文件内容以指定编码写入文件 UF8WithoutBOM
  83. Function WriteToFile(Path, Str, CharSet)
  84.     ' 由于UTF8会自动写BOM,所以使用UTF-8
  85.     If StrComp(replace(charset,"-",""), "UTF8BOM") =0 Then
  86.         CharSet = "UTF-8"
  87.     End If
  88.     Set stm = CreateObject("Adodb.Stream")
  89.     ' 这里1为二进制,2为文本型
  90.     stm.Type = 2
  91.     stm.mode = 3
  92.     stm.charset = CharSet
  93.     stm.Open
  94.     ' 如果为文本型只能指定为Size,写入文本后需要去掉BOM
  95.     ' stm.Position = stm.Size
  96.     ' write写二进制,writetext写文本型,
  97.     ' stream会自动先在流的最开始插入3个字节的BOM
  98.     stm.WriteText Str
  99.     stm.SaveToFile Path, 2
  100.     ' stm.SetEOS
  101.     stm.flush
  102.     stm.Close
  103.     Set stm = Nothing
  104. End Function
  105. ' 以文件本身编码读取文件
  106. Function ReadFile(Path, CharSet)
  107.     Set stm = CreateObject("Adodb.Stream")
  108.     stm.Type = 2
  109.     stm.mode = 3
  110.     stm.charset = CharSet
  111.     stm.Open
  112.     stm.loadfromfile Path
  113.     ReadFile = stm.ReadText(-1)
  114.     stm.Close
  115.     Set stm = Nothing
  116. End Function
  117. '该函数检查并返回文件的编码类型
  118. Function CheckCode(Path)
  119.     Dim slz
  120.     set slz = CreateObject("Adodb.Stream")
  121.     slz.Type = 1
  122.     slz.Mode = 3
  123.     slz.Open
  124.     slz.Position = 0
  125.     slz.Loadfromfile Path
  126.     Bin=slz.read(2)
  127.     If AscB(MidB(Bin, 1, 1)) = &HEF and AscB(MidB(Bin, 2, 1)) = &HBB Then
  128.         Codes="UTF-8-BOM"
  129.     ElseIf is_valid_utf8(read(Path)) Then
  130.         Codes="UTF-8"
  131.     ElseIf AscB(MidB(Bin,1,1))=&HFF and AscB(MidB(Bin,2,1))=&HFE Then
  132.         Codes="Unicode"
  133.     Else
  134.         Codes="GB2312"
  135.     End if
  136.     slz.Flush
  137.     slz.Close
  138.     Set slz = Nothing
  139.     CheckCode = Codes
  140. End Function
  141. '将Byte()数组转成String字符串
  142. Function read(path)
  143.     Dim ado, a(), i, n
  144.     Set ado = CreateObject("ADODB.Stream")
  145.     ado.Type = 1 : ado.Open
  146.     ado.LoadFromFile path
  147.     n = ado.Size - 1
  148.     ReDim a(n)
  149.     For i = 0 To n
  150.         a(i) = ChrW(AscB(ado.Read(1)))
  151.     Next
  152.     read = Join(a, "")
  153. End Function
  154. '准确验证文件是否为utf-8(能验证无BOM头的uft-8文件)
  155. Function is_valid_utf8(ByRef input) 'ByRef以提高效率
  156.     Dim s, re
  157.     Set re = New Regexp
  158.     s = "[\xC0-\xDF]([^\x80-\xBF]|$)"
  159.     s = s & "|[\xE0-\xEF].{0,1}([^\x80-\xBF]|$)"
  160.     s = s & "|[\xF0-\xF7].{0,2}([^\x80-\xBF]|$)"
  161.     s = s & "|[\xF8-\xFB].{0,3}([^\x80-\xBF]|$)"
  162.     s = s & "|[\xFC-\xFD].{0,4}([^\x80-\xBF]|$)"
  163.     s = s & "|[\xFE-\xFE].{0,5}([^\x80-\xBF]|$)"
  164.     s = s & "|[\x00-\x7F][\x80-\xBF]"
  165.     s = s & "|[\xC0-\xDF].[\x80-\xBF]"
  166.     s = s & "|[\xE0-\xEF]..[\x80-\xBF]"
  167.     s = s & "|[\xF0-\xF7]...[\x80-\xBF]"
  168.     s = s & "|[\xF8-\xFB]....[\x80-\xBF]"
  169.     s = s & "|[\xFC-\xFD].....[\x80-\xBF]"
  170.     s = s & "|[\xFE-\xFE]......[\x80-\xBF]"
  171.     s = s & "|^[\x80-\xBF]"
  172.     re.Pattern = s
  173.     is_valid_utf8 = (Not re.Test(input))
  174. End Function
复制代码

返回列表