[新手上路]批处理新手入门导读[视频教程]批处理基础视频教程[视频教程]VBS基础视频教程[批处理精品]批处理版照片整理器
[批处理精品]纯批处理备份&还原驱动[批处理精品]CMD命令50条不能说的秘密[在线下载]第三方命令行工具[在线帮助]VBScript / JScript 在线参考
返回列表 发帖
本帖最后由 flashercs 于 2019-9-3 19:46 编辑
  1. <#*,:&cls
  2. @echo off
  3. pushd "%~dp0"
  4. Powershell -NoProfile -ExecutionPolicy RemoteSigned -Command ". ([ScriptBlock]::Create((Get-Content -LiteralPath \"%~0\" -ReadCount 0 | Out-String ))) "
  5. popd
  6. pause
  7. exit /b
  8. #>
  9. $VerbosePreference = "Continue"
  10. $inputFile = "单词列表.txt"
  11. $outputFile = "结果.txt"
  12. Get-Content -LiteralPath $inputFile -OutBuffer 10 | ForEach-Object -Begin {
  13.   $sw = New-Object -TypeName System.IO.StreamWriter -ArgumentList ("$pwd\$outputFile"), $false, ([System.Text.Encoding]::Default)
  14.   # webclient settings
  15.   $webclient = New-Object -TypeName System.Net.WebClient
  16.   $webclient.BaseAddress = 'http://dict.cn/'
  17.   $webclient.Encoding = [System.Text.Encoding]::UTF8
  18.   $webclient.Headers.Add("Accept", "text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8")
  19.   $webclient.Headers.Add("Accept-Encoding", "gzip")
  20.   $webclient.Headers.Add("Accept-Language", "en-US, en; q=0.8, zh-Hans-CN; q=0.5, zh-Hans; q=0.3")
  21.   # xml parser
  22.   $xmldoc = New-Object -TypeName System.Xml.XmlDocument
  23.   # re
  24.   $recontent = [regex]'(?si)<div\s+class="word"[^>]*>.*?(?=<div\s+class="section[^"]*"[^>]*>)'
  25.   $rejs = [regex]'(?si)<script[^>]*>.*?</script>'
  26.   # stringbuilder
  27.   $strbuilder = New-Object -TypeName System.Text.StringBuilder
  28.   Add-Type -AssemblyName Microsoft.Jscript
  29.   $vsaengine = [Microsoft.JScript.Vsa.VsaEngine]::CreateEngine()
  30.   Add-Type -AssemblyName System.Web
  31. } -Process {
  32.   Write-Verbose "Fetching $_ ..."
  33.   for ($i = 2; $i -ge 0; $i--) {
  34.     try {
  35.       $readstream = $webclient.OpenRead($_)
  36.       Write-Verbose "Fetch $_ success"
  37.       break
  38.     }
  39.     catch {
  40.       $_ | Out-String | Write-Host -ForegroundColor Red
  41.     }
  42.   }
  43.   if ($readstream) {
  44.     try {
  45.       $gzipstream = New-Object -TypeName System.IO.Compression.GZipStream -ArgumentList $readstream, ([System.IO.Compression.CompressionMode]::Decompress)
  46.       $sr = New-Object -TypeName System.IO.StreamReader -ArgumentList $gzipstream, ([System.Text.Encoding]::UTF8)
  47.    
  48.       $match = $recontent.Match($sr.ReadToEnd())
  49.       if ($match.Success) {
  50.         Write-Verbose "Match $_ success"
  51.         $xmldoc.LoadXml(($rejs.Replace($match.Value, '') -replace '<([^\x00-\x7e]+)>', '&lt;$1&gt;'))
  52.         $strbuilder.Length = 0
  53.         # word-cont
  54.         [void]$strbuilder.Append($xmldoc.SelectSingleNode('//h1[@class="keyword"]/text()').Value).Append("`t")
  55.         # dict-translation
  56.         [void]$strbuilder.Append( ($xmldoc.SelectNodes('//ul/li[position()<last()]') | ForEach-Object { $_.innerText }) -join " " ).Append("`t")
  57.         try {
  58.           # dict-chart
  59.           $strjson = [System.Web.HttpUtility]::UrlDecode($xmldoc.SelectSingleNode('//div[@id="dict-chart-basic"]/@data').Value)
  60.           $jsobj = [Microsoft.JScript.Eval]::JScriptEvaluate("($strjson)", $vsaengine )
  61.           foreach ($field in $jsobj) {
  62.             [void]$strbuilder.Append($jsobj.Item($field).Item('sense')).Append(':').Append($jsobj.Item($field).Item('percent')).Append(',')
  63.           }
  64.           [void]$strbuilder.Remove($strbuilder.Length - 1, 1)
  65.         }
  66.         catch {
  67.           # $_ | Out-String | Write-Host -ForegroundColor Red
  68.         }
  69.         # output result string
  70.         $sw.WriteLine(($strbuilder.ToString() -replace "[\r\n]"))
  71.         # $strbuilder.ToString()|Out-Host
  72.       }
  73.       else {
  74.         Write-Verbose "Match $_ failed"
  75.       }
  76.     }
  77.     catch {
  78.       $_ | Out-String | Write-Host -ForegroundColor Red  
  79.     }
  80.     finally {
  81.       $readstream.Close()
  82.       Remove-Variable readstream
  83.       if ($gzipstream) {
  84.         $gzipstream.Dispose()
  85.       }
  86.       if ($sr) {
  87.         $sr.Dispose()
  88.       }
  89.     }
  90.   }
  91.   else {
  92.     $sw.WriteLine($_)
  93.   }
  94. } -End {
  95.   $sw.Dispose()
  96.   $webclient.Dispose()
  97. }
复制代码
微信:flashercs
QQ:49908356

TOP

回复 16# qixiaobin0715


    你什么Windows系统?不太敢用htmldom,win7与win10对html兼容性差别很大
微信:flashercs
QQ:49908356

TOP

回复 20# qixiaobin0715


    已修改代码,请测试,支持win7和win10
微信:flashercs
QQ:49908356

TOP

回复 26# qixiaobin0715

本来就有错误提示啊,结果也忽略了,你的意思是怎么修改?
微信:flashercs
QQ:49908356

TOP

回复 29# qixiaobin0715


    已修改了,主要是没明白你的要求!
微信:flashercs
QQ:49908356

TOP

回复 32# qixiaobin0715


    修复了第一个问题。
第二个问题是你记事本打开了自动换行。。。。。
微信:flashercs
QQ:49908356

TOP

回复 34# qixiaobin0715


   已修改
微信:flashercs
QQ:49908356

TOP

本帖最后由 flashercs 于 2019-11-9 11:08 编辑
  1. <#*,:&cls
  2. @echo off
  3. pushd "%~dp0"
  4. Powershell -NoProfile -ExecutionPolicy RemoteSigned -Command ". ([ScriptBlock]::Create((Get-Content -LiteralPath \"%~0\" -ReadCount 0 | Out-String ))) "
  5. popd
  6. pause
  7. exit /b
  8. #>
  9. $VerbosePreference = "Continue"
  10. $inputFile = "单词列表.txt"
  11. $outputFile = "结果.txt"
  12. Get-Content -LiteralPath $inputFile -OutBuffer 100 | ForEach-Object -Begin {
  13.   $sw = New-Object -TypeName System.IO.StreamWriter -ArgumentList $outputFile, $false, ([System.Text.Encoding]::Default), 65536
  14.   # webclient settings
  15.   $webclient = New-Object -TypeName System.Net.WebClient
  16.   $webclient.BaseAddress = 'http://dict.cn/'
  17.   $webclient.Encoding = [System.Text.Encoding]::UTF8
  18.   $webclient.Headers.Add("Accept", "text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8")
  19.   $webclient.Headers.Add("Accept-Encoding", "gzip")
  20.   $webclient.Headers.Add("Accept-Language", "en-US, en; q=0.8, zh-Hans-CN; q=0.5, zh-Hans; q=0.3")
  21.   # xml parser
  22.   $xmldoc = New-Object -TypeName System.Xml.XmlDocument
  23.   # re
  24.   $remain = [regex]'(?si)<div\s+class="main"[^>]*>.*?(?=<div\s+class="righter")'
  25.   # $recontent = [regex]'(?si)<div\s+class="word"[^>]*>.*?(?=<div\s+class="section[^"]*"[^>]*>)'
  26.   $rejs = [regex]'(?si)<script[^>]*>.*?</script>'
  27.   # $resent = [regex]'(?si)(?<=<h3[^>]*>例句</h3>).*?(?=<h3)'
  28.   $reXMLEntities = [regex]'(?si)&[^;<]*(;|(?=<|$))'
  29.   $evaluator = {
  30.     param($m)
  31.     $s = $m.Value;
  32.     if ($m.Groups[1].Value -eq '') {
  33.       $s += ';'
  34.     }
  35.     [System.Web.HttpUtility]::HtmlEncode([System.Web.HttpUtility]::HtmlDecode($s))
  36.   } -as [System.Text.RegularExpressions.MatchEvaluator]
  37.   # stringbuilder
  38.   $strbuilder = New-Object -TypeName System.Text.StringBuilder
  39.   Add-Type -AssemblyName System.Web.Extensions
  40.   $JSON = New-Object -TypeName System.Web.Script.Serialization.JavascriptSerializer -ErrorAction Stop
  41.   # $vsaengine = [Microsoft.JScript.Vsa.VsaEngine]::CreateEngine()
  42.   Add-Type -AssemblyName System.Web
  43. } -Process {
  44.   Write-Verbose "Fetching $_ ..."
  45.   for ($i = 2; $i -ge 0; $i--) {
  46.     try {
  47.       $readstream = $webclient.OpenRead($_)
  48.       Write-Verbose "Fetch $_ success"
  49.       break
  50.     } catch {
  51.       $_ | Out-String | Write-Host -ForegroundColor Red
  52.     }
  53.   }
  54.   if ($readstream) {
  55.     try {
  56.       $gzipstream = New-Object -TypeName System.IO.Compression.GZipStream -ArgumentList $readstream, ([System.IO.Compression.CompressionMode]::Decompress)
  57.       $sr = New-Object -TypeName System.IO.StreamReader -ArgumentList $gzipstream, ([System.Text.Encoding]::UTF8)
  58.       $strhtml = $sr.ReadToEnd()
  59.       $match = $remain.Match($strhtml)
  60.       # div.main matched
  61.       if ($match.Success) {
  62.         Write-Verbose "Match $_ success"
  63.         # convert html to xml
  64.         $strxml = $rejs.Replace($match.Value, '') -replace '(?s)<!--.*?-->' -replace '<([^\x00-\x7e]+)>', '&lt;$1&gt;' -replace '<br>', '<br/>'
  65.         $strxml = $reXMLEntities.Replace($strxml, $evaluator)
  66.         $xmldoc.LoadXml($strxml)
  67.         $strbuilder.Length = 0
  68.         $nodeWord = $xmldoc.DocumentElement.SelectSingleNode('div[@class="word"]')
  69.         # word-cont
  70.         [void]$strbuilder.Append($nodeWord.SelectSingleNode('.//h1[@class="keyword"]/text()').Value).Append("`t")
  71.         # dict-translation
  72.         [void]$strbuilder.Append( ($nodeWord.SelectNodes('.//ul/li[position()<last()]') | ForEach-Object { $_.innerText }) -join " " ).Append("`t")
  73.         # dict-chart
  74.         $nodeChartBasic = $nodeWord.SelectSingleNode('.//div[@id="dict-chart-basic"]/@data')
  75.         # chart basic exist
  76.         if ($nodeChartBasic) {
  77.           $strjson = [System.Uri]::UnescapeDataString($nodeChartBasic.Value)
  78.           $jsobj = $JSON.DeserializeObject($strjson)
  79.           foreach ($field in $jsobj.Keys) {
  80.             [void]$strbuilder.Append($jsobj.Item($field).Item('sense')).Append(':').Append($jsobj.Item($field).Item('percent')).Append(',')
  81.           }
  82.           [void]$strbuilder.Remove($strbuilder.Length - 1, 1)
  83.         }
  84.         [void]$strbuilder.Append("`t")
  85.         
  86.         # 例句
  87.         # $match1 = $resent.Match($strhtml)
  88.         $nodeSent = $xmldoc.DocumentElement.SelectSingleNode('div[@class="section sent"]/h3[text()="例句"]/following-sibling::div')
  89.         if ($nodeSent) {
  90.           try {
  91.             # $xmldoc.LoadXml(($match1.Value -replace '<br>', '<br/>' -replace '<([^\x00-\x7e]+)>', '&lt;$1&gt;'))
  92.             $nodeSent.SelectNodes('ol') | ForEach-Object {
  93.               # 用作形容词 (.adj)
  94.               [void]$strbuilder.Append(($_.PreviousSibling.InnerText -replace '\s+' -replace '\(', ' $&')).Append('<br>')
  95.               $_.SelectNodes('li[position()<3]') | ForEach-Object -Begin { $index = 0 } -Process {
  96.                 $index++;
  97.                 [void]$strbuilder.Append("$index.$(($_.SelectNodes('text()')|ForEach-Object {$_.Value}) -join '<br>')").Append('<br>')
  98.               }
  99.             }
  100.             [void]$strbuilder.Remove($strbuilder.Length - 4, 4)
  101.           } catch {
  102.             $_ | Out-String | Write-Host -ForegroundColor Red
  103.           }
  104.         } else {
  105.           Write-Verbose "没有例句."
  106.         }
  107.         # output result string
  108.         $sw.WriteLine(($strbuilder.ToString() -replace "[\r\n]+"))
  109.       } else {
  110.         Write-Verbose "Match $_ failed"
  111.       }
  112.     } catch {
  113.       $_ | out-string | Write-Host -ForegroundColor Red
  114.     } finally {
  115.       $readstream.Close()
  116.       Remove-Variable readstream
  117.       if ($gzipstream) {
  118.         $gzipstream.Dispose()
  119.       }
  120.       if ($sr) {
  121.         $sr.Dispose()
  122.       }
  123.     }
  124.   } else {
  125.     $sw.WriteLine($_)
  126.   }
  127. } -End {
  128.   $sw.Dispose()
  129.   $webclient.Dispose()
  130. }
复制代码
微信:flashercs
QQ:49908356

TOP

回复 40# qixiaobin0715


    改
微信:flashercs
QQ:49908356

TOP

回复 45# qixiaobin0715


    先前我对制表符理解有误,是需要4列数据吧?已修改。
微信:flashercs
QQ:49908356

TOP

返回列表