|
|
https://demon.tw/programming/vbs-validate-utf8.html
这里用vbscript逐字节读取转换文件
http://www.bathome.net/thread-71209-1-1.html
这里3楼用jscript逐字节读取转换文件
https://demon.tw/programming/vbs-binary-file-another.html
这里采用了Microsoft.XMLDOM对象读写二进制
经测试,用Microsoft.XMLDOM对象明显高效一点- Dim t1, t2, oWMI, oSWbemDateTime, s
-
- s = "F:\资料\电子书\诡刺.txt"
-
- Set oWMI = GetObject("Winmgmts:\\.\Root\CIMV2")
- Set oSWbemDateTime = CreateObject("WbemScripting.SWbemDateTime")
-
- For Each i In oWMI.InstancesOf("Win32_OperatingSystem")
- t1 = i.LocalDateTime
- Next
- wsh.Echo code1(s, "utf-8")
- For Each i In oWMI.InstancesOf("Win32_OperatingSystem")
- t2 = i.LocalDateTime
- Next
- wsh.Echo t1 & vbCrLf & t2
-
- For Each i In oWMI.InstancesOf("Win32_OperatingSystem")
- t1 = i.LocalDateTime
- Next
- wsh.Echo code2(s)
- For Each i In oWMI.InstancesOf("Win32_OperatingSystem")
- t2 = i.LocalDateTime
- Next
- wsh.Echo t1 & vbCrLf & t2
-
- Function code1(ByVal file, ByVal Code)
- Dim oVector, oNode, oStream, s, i
- code1 = True
- Set oVector = CreateObject("WIA.Vector")
- Set oNode = CreateObject("Microsoft.XMLDOM").CreateElement("b")
- oNode.DataType = "bin.hex"
- Set oStream = CreateObject("ADODB.Stream")
- oStream.Type = 1 'adTypeBinary
- oStream.Mode = 3 'adModeReadWrite
- oStream.Open()
- oStream.LoadFromFile file
- If oStream.Size = 0 Then Exit Function
- oNode.NodeTypedValue = oStream.Read()
- s = oNode.Text
- oStream.Position = 0
- oStream.Type = 2 'adTypeText
- oStream.Charset = Code
- t = oStream.ReadText()
- oVector.SetFromString s
- oNode.NodeTypedValue = oVector.BinaryData
- If s <> oNode.Text Then code1 = False
- End Function
-
- Function code2(ByVal file)
- Dim oNode, oStream, s, i
- code2 = True
- Set oNode = CreateObject("Microsoft.XMLDOM").CreateElement("b")
- oNode.DataType = "bin.hex"
- Set oStream = CreateObject("ADODB.Stream")
- oStream.Type = 1 'adTypeBinary
- oStream.Mode = 3 'adModeReadWrite
- oStream.Open()
- oStream.LoadFromFile file
- If oStream.Size = 0 Then Exit Function
- oNode.NodeTypedValue = oStream.Read()
- s = "efbbbf" & oNode.Text
- oStream.Position = 0
- oStream.Type = 2 'adTypeText
- oStream.Charset = "utf-8"
- t = oStream.ReadText()
- oStream.Position = 0
- oStream.SetEOS()
- oStream.WriteText t
- oStream.Position = 0
- oStream.Type = 1 'adTypeBinary
- oNode.NodeTypedValue = oStream.Read()
- If s <> oNode.Text Then code2 = False
- End Function
-
- Function code3(ByVal file)
- Dim oStream, oRegExp, arr(), s
- Set oStream = CreateObject("ADODB.Stream")
- oStream.Type = 1 'adTypeBinary
- oStream.Mode = 3 'adModeReadWrite
- oStream.Open()
- oStream.LoadFromFile file
- If oStream.Size = 0 Then code3 = True : Exit Function
- ReDim arr(oStream.Size - 1)
- For s = 0 To oStream.Size - 1
- arr(s) = ChrW(AscB(oStream.Read(1)))
- Next
- oStream.Close()
- s = "[\xC0-\xDF]([^\x80-\xBF]|$)" & _
- "|[\xE0-\xEF].{0,1}([^\x80-\xBF]|$)" & _
- "|[\xF0-\xF7].{0,2}([^\x80-\xBF]|$)" & _
- "|[\xF8-\xFB].{0,3}([^\x80-\xBF]|$)" & _
- "|[\xFC-\xFD].{0,4}([^\x80-\xBF]|$)" & _
- "|[\xFE-\xFE].{0,5}([^\x80-\xBF]|$)" & _
- "|[\x00-\x7F][\x80-\xBF]" & _
- "|[\xC0-\xDF].[\x80-\xBF]" & _
- "|[\xE0-\xEF]..[\x80-\xBF]" & _
- "|[\xF0-\xF7]...[\x80-\xBF]" & _
- "|[\xF8-\xFB]....[\x80-\xBF]" & _
- "|[\xFC-\xFD].....[\x80-\xBF]" & _
- "|[\xFE-\xFE]......[\x80-\xBF]" & _
- "|^[\x80-\xBF]"
- Set oRegExp = New RegExp
- oRegExp.MultiLine = False
- oRegExp.Pattern = s
- code3 = Not oRegExp.Test(Join(arr, ""))
- End Function
复制代码 code1采用WIA.Vector对象转换文本与二进制
code2采用ADODB.Stream对象转换文本与二进制
code3是用vbscript逐字节读取转换文件
测试1.23M的文件
code1 = 0.266秒
code2 = 0.243秒
code3 = 1.652秒
测试2.83M的文件
code1 = 1.082秒
code2 = 1.015秒
code3 = 3.749秒
测试4.87M的文件
code1 = 3.056秒
code2 = 2.903秒
估计要是有直接进行二进制数据比较的com对象或函数会更快一点 |
|