1. 程式人生 > >Go讀取論文並轉換為simhahs

Go讀取論文並轉換為simhahs

package main

import (
    "fmt"
    _"flag"
    _ "os"
    _ "io/ioutil"
    _"strings"
    _ "path"
     "log"
    _ "baliance.com/gooxml/document"
    "database/sql"
    _ "github.com/go-sql-driver/mysql"
    "time"
    "github.com/yanyiwu/gosimhash"
    
) 


func main(){



    t1 := time.Now()

    Mylog(doc)
    if err != nil {
        Mylog(err)
    }

    db, err := sql.Open("mysql", "root:
[email protected]
(127.0.0.1:3306)/gzpg_crs_jsj?charset=utf8"); if err != nil { fmt.Println(err); } sql :="select s1.paper_id,s2.title_cn,s2.abstract_cn,s2.keyword_cn,s2.title_en,s2.abstract_en,s2.keyword_en,s1.s_content from sf_content s1,sf_paper s2 where s1.paper_id=s2.paper_id limit 10" rows, err := db.Query(sql) if err != nil { fmt.Println(err); } stmt, err := db.Prepare("INSERT sim_path SET paperid=?,simcode=?") if err != nil { fmt.Println(err); } var str string var code string //查詢多個 for rows.Next() { var paper_id int //論文id var title_cn string //中文題目 var abstract_cn string //中文摘要 var keyword_cn string //中文關鍵詞 var title_en string //英文題目 var abstract_en string //英文摘要 var keyword_en string //英文關鍵詞 var s_content string//全文內容 err = rows.Scan(&paper_id, &title_cn,&abstract_cn,&keyword_cn,&title_en,&abstract_en,&keyword_en,&s_content) str = fmt.Sprintf("%s\n 摘要:%s\n 關鍵詞:%s\n %s\n Abstract:%s\n Keywords:%s\n %s\n",title_cn,abstract_cn,keyword_cn,title_en,abstract_en,keyword_en,s_content) code=simhash(str) res, err := stmt.Exec(paper_id, code) if err != nil { fmt.Println(err); } id, err := res.LastInsertId() if err != nil { fmt.Println(err); } fmt.Print("%s成功%s \n",id,paper_id); } db.Close() elapsed := time.Since(t1) log.Println("時間花費位:\n" , elapsed) } func simhash(str string) (string) { hasher := gosimhash.New("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8") defer hasher.Free() fingerprint := hasher.MakeSimhash(str, 1) var code string var s string = "0000000000000000000000000000000000000000000000000000000000000000" bs := []byte(s) for i := 63; i >= 0; i-- { if (fingerprint&1)==1 { bs[i]='1' } else { bs[i]='0' } fingerprint >>=1 } code =string(bs) return code } func Mylog(v ...interface{}) { f, err := os.OpenFile("20181105go.log", os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err != nil { Mylog(err) } defer f.Close() logger := log.New(f, TAG, log.Ldate|log.Ltime|log.Lmicroseconds) logger.Println(v...) }