[新手上路]批处理新手入门导读[视频教程]批处理基础视频教程[视频教程]VBS基础视频教程[批处理精品]批处理版照片整理器
[批处理精品]纯批处理备份&还原驱动[批处理精品]CMD命令50条不能说的秘密[在线下载]第三方命令行工具[在线帮助]VBScript / JScript 在线参考
返回列表 发帖
不知这个效率怎样 3万行 17秒
  1. <#*,:&cls
  2. @echo off
  3. pushd "%~dp0"
  4. Powershell -NoProfile -ExecutionPolicy RemoteSigned -Command ". ([ScriptBlock]::Create((Get-Content -LiteralPath \"%~0\" -ReadCount 0 | Out-String ))) "
  5. popd
  6. pause
  7. exit /b
  8. #>
  9. $FileList = "源文件.txt"
  10. $FileOut = "词频统计.txt"
  11. function Get-WordCount {
  12.   [CmdletBinding()]
  13.   param (
  14.     [Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)]
  15.     [AllowEmptyCollection()]
  16.     [AllowEmptyString()]
  17.     [AllowNull()]
  18.     [string[]]$Sentences,
  19.     [Parameter(Mandatory = $false, Position = 1)]
  20.     [ValidateNotNullOrEmpty()]
  21.     [ValidateScript( { $_ -gt 0 })]
  22.     [int[]]$WordLengthList = @(2, 3, 4, 5, 6, 7),
  23.     [Parameter(Mandatory = $false, Position = 2)]
  24.     [switch]$IncludePunctuations
  25.   )
  26.   
  27.   begin {
  28.     $dicWordCount = @{ }
  29.     for ($i = 0; $i -lt $WordLengthList.Count; $i++) {
  30.       $dicWordCount.Add($WordLengthList[$i], (New-Object "System.Collections.Generic.Dictionary[string, int]"))
  31.     }
  32.     # $reWord = New-Object System.Text.RegularExpressions.Regex -ArgumentList @("\w+")
  33.     $reWord = New-Object System.Text.RegularExpressions.Regex -ArgumentList @("[\u4E00-\u9FA5]+")
  34.   }
  35.   
  36.   process {
  37.     foreach ($Sentence in $Sentences) {
  38.       if ($IncludePunctuations) {
  39.         $WordList = , $Sentence
  40.       } else {
  41.         $WordList = $reWord.Matches($Sentence) | ForEach-Object { $_.Value }
  42.       }
  43.       if ($WordList) {
  44.         foreach ($Word in $WordList) {
  45.           foreach ($WordLength in $WordLengthList) {
  46.             $PosTail = $Word.Length - $WordLength
  47.             $dicCtr = $dicWordCount.Item($WordLength)
  48.             for ($i = 0; $i -le $PosTail; $i++) {
  49.               $dicCtr[$Word.SubString($i, $WordLength)]++
  50.             }
  51.           }
  52.         }
  53.       }
  54.     }
  55.   }
  56.   
  57.   end {
  58.     $dicWordCount
  59.   }
  60. }
  61. $dicWordCount = Get-WordCount -Sentences (Get-Content -ReadCount 0 -Path $FileList)
  62. Set-Content -Value (
  63.   $dicWordCount.Keys | Sort-Object | ForEach-Object {
  64.     $dicCtr = $dicWordCount[$_]
  65.     foreach ($key in $dicCtr.Keys) {
  66.       '{0,-10} : {1}' -f $key, $dicCtr[$key]
  67.     }
  68.   }) -LiteralPath $FileOut
复制代码
1

评分人数

微信:flashercs
QQ:49908356

TOP

返回列表