1. 程式人生 > >20170601xlVBA正則表達式提取體檢數據

20170601xlVBA正則表達式提取體檢數據

regexpr ade column app str loop cin code pen

Public Sub GetFirst()
    GetDataFromWord "初檢"
End Sub

Public Sub GetDataFromWord(ByVal SheetName As String)
    AppSettings
    ‘On Error GoTo ErrHandler
    Dim StartTime, UsedTime As Variant
    StartTime = VBA.Timer
    ‘Input code here

    Dim Wb As Workbook
    Dim Sht As Worksheet
    Dim oSht As Worksheet
    Dim Rng As Range
    Dim Arr As Variant

    Dim wdApp As Word.Application
    Dim wdDoc As Word.Document
    Dim wdRng As Word.Range


    ‘Const SHEET_NAME As String = "提取信息"
    Set Wb = Application.ThisWorkbook
    Set Sht = Wb.Worksheets(SheetName)

    Dim FilePath As String
    With Application.FileDialog(msoFileDialogFilePicker)
        .AllowMultiSelect = False
        .InitialFileName = Wb.Path
        .Title = "提取" & SheetName & "數據"
        .Filters.Clear
        .Filters.Add "Word文檔", "*.rtf*"
        If .Show = -1 Then
            FilePath = .SelectedItems(1)
        Else
            MsgBox "您沒有選中任何文件夾,本次匯總中斷!"
            Exit Sub
        End If
    End With

    Debug.Print FilePath



    Set wdApp = New Word.Application
    Set wdDoc = wdApp.Documents.Open(FilePath)
    Application.StatusBar = ">>>>>>>>Positioning & Replacing >>>>>>>>"
    PositioningClear wdDoc, 5    ‘定位刪除英文行 避免正則提取造成幹擾


    Application.StatusBar = ">>>>>>>>Regexpress Getting array >>>>>>>>"
    Arr = RegGetArray(wdDoc.Content.Text)    ‘正則從全文提取內容 存入數組
    wdDoc.Close False    ‘關閉doc
    wdApp.Quit    ‘退出app
    Set wdApp = Nothing
    Set wdDoc = Nothing


    With Sht
        .Cells.Clear
        .Range("A1:D1").Value = Array("大項", "小項", "D值", "E值")
        Set Rng = .Range("A2").Resize(UBound(Arr, 2), UBound(Arr))
        Rng.Value = Application.WorksheetFunction.Transpose(Arr)
        Sort2003 .UsedRange
    End With


    UsedTime = VBA.Timer - StartTime
    Debug.Print "UsedTime:" & Format(UsedTime, "0.000 Seconds")
    ‘MsgBox "UsedTime:" & Format(UsedTime, "0.000 Seconds"), vbOKOnly, "NextSeven  QQ "
ErrorExit:
    Set Wb = Nothing
    Set Sht = Nothing
    Set Rng = Nothing
    AppSettings False

    On Error Resume Next
    wdApp.Quit

    Exit Sub
ErrHandler:
    If Err.Number <> 0 Then
        MsgBox Err.Description & "!", vbCritical, "NextSeven QQ "
        Debug.Print Err.Description
        Err.Clear
        Resume ErrorExit
    End If
End Sub
Public Sub AppSettings(Optional IsStart As Boolean = True)
    If IsStart Then
        Application.ScreenUpdating = False
        Application.DisplayAlerts = False
        Application.Calculation = xlCalculationManual
        Application.StatusBar = ">>>>>>>>Macro Is Running>>>>>>>>"
    Else
        Application.ScreenUpdating = True
        Application.DisplayAlerts = True
        Application.Calculation = xlCalculationAutomatic
        Application.StatusBar = False
    End If
End Sub
Function RegGetArray(ByVal OrgText As String) As String()
    Dim Reg As Object, Mh As Object, OneMh As Object
    Dim Reg2 As Object

    Dim Arr() As String, Index As Long
    Dim Elm As String
    Set Reg = CreateObject("Vbscript.Regexp")
    Set Reg2 = CreateObject("Vbscript.Regexp")

    Reg2.Global = True


    With Reg
        ‘OrgText = Application.ActiveDocument.Content
        .MultiLine = True
        .Global = True
        .Ignorecase = False
        ‘可用
        ‘.Pattern = "(?:\s)?(\S*?)?\s? *" & "(?:[ ])([^ ][^\r\n\v]*?)\s+?(D=[\d\.]+)\s+(E=[\d\.]+)[\s]+?"
        .Pattern = "(?:\s+?)([一-龥;,,]*?)?\s? *" & "(?:[ ])([^ ][^\r\n\v]*?)\s+?(D=[\d\.]+)\s+(E=[\d\.]+)[\s]+?"
        Set Mh = .Execute(OrgText)
        Index = 0
        ReDim Arr(1 To 4, 1 To 1)
        For Each OneMh In Mh
            Index = Index + 1
            ReDim Preserve Arr(1 To 4, 1 To Index)
            If OneMh.submatches(0) <> "" Then Elm = OneMh.submatches(0)

            Reg2.Pattern = "[;,,]?(左視圖|前視圖|縱切面)+[;,,]?"
            Arr(1, Index) = Reg2.Replace(Elm, "")


            Reg2.Pattern = "[\s#G]"
            Arr(2, Index) = Reg2.Replace(OneMh.submatches(1), "")
            ‘Debug.Print OneMh.submatches(2)
            Arr(3, Index) = Split(OneMh.submatches(2), "=")(1)
            ‘Debug.Print OneMh.submatches(3)
            Arr(4, Index) = Split(OneMh.submatches(3), "=")(1)
        Next OneMh
    End With
    RegGetArray = Arr
    Set Reg = Nothing: Set Mh = Nothing
    Set Reg2 = Nothing
End Function

Public Sub PositioningClear(ByVal OpenDoc As Word.Document, ByVal Times As Long)
    Dim wdRng As Word.Range
    Dim lngStart As Long
    Dim lngEnd As Long
    Dim lngTime As Long
    For lngTime = 1 To Times
        lngEnd = OpenDoc.Content.End
        With OpenDoc.Content.Find
            .ClearFormatting
            .Replacement.ClearFormatting
            .Text = "ALIMENTARY SYSTEM"
            .Replacement.Text = ""
            If .Execute Then
                lngStart = .Parent.Start
                Set wdRng = OpenDoc.Range(lngStart, lngEnd)
            End If
        End With

        If Not wdRng Is Nothing Then
            With wdRng.Find
                .ClearFormatting
                .Replacement.ClearFormatting
                .Text = "[^l^13][A-Za-z0-9\- ,;:.]@[^l^13]"
                .MatchWildcards = True
                .Wrap = wdFindStop
                .Forward = True
                .Replacement.Text = "^l"
                ‘n = 0
                .Execute Replace:=wdReplaceAll
                ‘Do While .Execute
                ‘   n = n + 1
                ‘   Debug.Print n; "____________"; .Parent.Text
                ‘    If n > 1000 Then Exit Do
                ‘Loop
            End With
        End If
        Set wdRng = Nothing
    Next lngTime

End Sub

Sub Sort2003(ByVal RngWithTitle As Range, Optional SortColumnNo As Long = 1)
‘key1代表第一個排序的列的關鍵字
‘Order1表示第一字段的排序方式,賦值為xlAscending表示升序,改為xlDescending表示降序。
‘Header表示是否包含標題,賦值為xlYes表示標題不參與排序,賦值為xlNo表示標題也參數排序
‘MatchCase表示排序時是否區分大小寫,賦值為False表示不區分大小寫
‘Orientation表示排序方向,賦值為xlTopToBottom或者xlSortColumns表示按列排序,賦值為xlSortRows 表示排行排序
‘SortMethod用於限制對漢字排序時的排序方式,賦值為xlPinYin表示按拼音排序,賦值為xlStroke表示按筆劃排序
    With RngWithTitle
        .Sort Key1:=RngWithTitle.Cells(1, SortColumnNo), Order1:=xlAscending, Header:=xlYes, _
              MatchCase:=False, Orientation:=xlTopToBottom, SortMethod:=xlPinYin
    End With
End Sub

  

20170601xlVBA正則表達式提取體檢數據