唐宇迪Seq2Seq程式碼+註釋（tensorflow1.2版本）

阿新 • • 發佈：2018-12-15

import pandas as pd
import re
import numpy as np
import tensorflow as tf
import time

filename = 'E:\DataSets\Reviews.csv\Reviews.csv'
reviews = pd.read_csv(filename)
# print(reviews.isnull().sum())
# 去除缺失的空值
reviews = reviews.dropna()
# 去除不需要的列
reviews = reviews.drop(
    ['Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator', 'HelpfulnessDenominator', 'Score', 'Time'], 1)
# 對去除以後的內容重新分配index
reviews = reviews.reset_index(drop=True)
# print(reviews.head())，顯示去除以後的前十個text與summary
#連詞轉換詞典
contractions = {
    "ain't": "am not",
    "aren't": "are not",
    "can't": "can not",
    "can't've": "cannot have",
    "'cause": "because",
    "could've": "could have",
    "couldn't": "could not have",
    "didn't": "did not",
    "doesn't": "does not",
    "don't": "do not",
    "hadn't": "had not",
    "hadn't've": "had not have",
    "hasn't": "has not",
    "haven't": "have not",
    "he'd": "he would",
    "he'd've": "he would have",
    "he's": "he is",
    "how'd": "how did",
    "how'll": "how will",
    "how's": "how is",
    "i'd": "i would",
    "i'll": "i will",
    "i'm": "i am",
    "i've": "i have",
    "isn't": "is not",
    "it'd": "it would",
    "it'll": "it will",
    "it's": "it is",
    "let's": "let us",
    "ma'am": "madam",
    "mayn't": "may not",
    "might've": "might have",
    "mightn't": "might not",
    "must've": "must have",
    "mustn't": "must not",
    "needn't": "need not",
    "oughtn't": "ought not",
    "shan't": "shall not",
    "sha'n't": "shall not",
    "she'd": "she would",
    "she'll": "she will",
    "she's": "she is",
    "should've": "should have",
    "shouldn't": "should not",
    "that'd": "that would",
    "that's": "that is",
    "there'd": "there had",
    "there's": "there is",
    "they'd": "they would",
    "they've": "they have",
    "wasn't": "was not",
    "we'd": "we would",
    "we'll": "we will",
    "we're": "we are",
    "we've": "we have",
    "weren't": "were not",
    "what'll": "what will",
    "what've": "what have",
    "what'd": "what did",
    "where's": "where is",
    "who'll": "who will",
    "who's": "who is",
    "won't": "will not",
    "wouldn't": "would not",
    "you'd": "you would",
    "you'll": "you will",
    "you're": "you are"
}

#對文字內容進行清洗，全部轉化為小寫，最後形成'i want to rock you'形式
def clean_text(text, remove_stopwords=True):
    text = text.lower()
    if True:
        text = text.split()
        new_text = []
        for word in text:
            if word in contractions:
                new_text.append(contractions[word])
            else:
                new_text.append(word)
        #形成新的句子，型別為str
        text = " ".join(new_text)
    #去除一些特殊符號
    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
    text = re.sub(r'\<a href', ' ', text)
    text = re.sub(r'&', '', text)
    text = re.sub(r'[_"\-;%()|+&=*%.,!?:# 
[email protected]\[\]/]', ' ', text)
    text = re.sub(r'<br />', ' ', text)
    text = re.sub(r'\'', ' ', text)
    words = open("Englishstopwords.txt", 'r')
    stop = words.readlines()
    stopwords = stop[0].split(" ")
    stopwords = stopwords[0:-1]
    if remove_stopwords:
        text = text.split()
        stops = set(stopwords)
        text = [w for w in text if w not in stops]
        # 形成新的句子，型別為str
        text = " ".join(text)
    return text


clean_summaries = []
#clean_summaries 和 clean_texts 裡面的格式為['i want to rock you','you will win the championship',...]
for summary in reviews.Summary:
    clean_summaries.append(clean_text(summary, remove_stopwords=False))
print("Summaries are completed")
clean_texts = []
for text in reviews.Text:
    clean_texts.append(clean_text(text, remove_stopwords=True))
print("Texts are completed")

#生成單詞字典形式，形式為{‘many’:897,'hate':234,....}
def count_words(count_dict, text):
    for setence in text:
        for word in setence.split():
            if word not in count_dict:
                count_dict[word] = 1
            else:
                count_dict[word] += 1


# 單詞的字典，也就是說word_count儲存單詞的詞頻，不包括重複單詞
word_counts = {}
count_words(word_counts, clean_summaries)
count_words(word_counts, clean_texts)
print("Size of Vocabulary :", len(word_counts))
embeddings_index = {}
# 將訓練好的向量以字典形式儲存,第一步載入詞向量，embeddings_index最終的形式為{‘a’:(詞向量)，‘money’:(詞向量),...}
with open(r'E:\word2vecmodel\numberbatch-en-17.04b.txt', 'r',encoding='utf8') as f:
    for line in f:
        values = line.split(' ')
        word = values[0]
        embedding = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = embedding
missing_words = 0
threshold = 20  # 設定閾值，出現次數小於20的就不用了
for word, count in word_counts.items():
    if count > threshold:
        if word not in embeddings_index:
            missing_words += 1  # 統計不在詞向量的字典中的個數,且滿足經常出現條件
missing_ratio = round(missing_words / len(word_counts), 4) * 100
print("Number of words missing from CN:", missing_words)
print("Percent of words that are missing from vocabulary: {}%".format(missing_ratio))
vocab_to_int = {}
value = 0
#將單詞對映為整數
for word, count in word_counts.items():
    if count >= threshold or word in embeddings_index:
        vocab_to_int[word] = value
        value += 1
# 特殊符號
codes = ["<UNK>", "<PAD>", "<EOS>", "<GO>"]
for code in codes:
    vocab_to_int[code] = len(vocab_to_int)
int_to_vocab = {}
#顛倒vocab_to_int
for word, value in vocab_to_int.items():
    int_to_vocab[value] = word
usage_ratio = round(len(vocab_to_int) / len(word_counts), 4) * 100
print("Total number of unique words:", len(word_counts))
print("Number of words we will use:", len(vocab_to_int))
print("Percent of words we will use: {}%".format(usage_ratio))
embedding_dim = 300
nb_words = len(vocab_to_int)
# 初始化詞向量，最後得到word_embedding_matrix為矩陣shape為nb_words * 300
word_embedding_matrix = np.zeros((nb_words, embedding_dim), dtype=np.float32)
for word, i in vocab_to_int.items():
    if word in embeddings_index:
        word_embedding_matrix[i] = embeddings_index[word]
    else:
        new_embedding = np.array(np.random.uniform(-1.0, 1.0, embedding_dim))
        embeddings_index[word] = new_embedding
        word_embedding_matrix[i] = new_embedding

#將setence中的單詞形成數字[[1,234,7687,23,...],[345,908,2359,11234,...],...]
def convert_to_ints(text, word_count, unk_count, eos=False):
    ints = []
    for setence in text:
        setence_ints = []
        for word in setence.split():
            word_count += 1
            if word in vocab_to_int:
                setence_ints.append(vocab_to_int[word])
            else:
                setence_ints.append(vocab_to_int['<UNK>'])
                unk_count += 1
        if eos:
            setence_ints.append(vocab_to_int['<EOS>'])
        ints.append(setence_ints)
    return ints, word_count, unk_count


word_count = 0
unk_count = 0
#int_summaries和int_texts格式為[[1,234,7687,23,...],[345,908,2359,11234,...],...]
int_summaries, word_count, unk_count = convert_to_ints(clean_summaries, word_count, unk_count)
int_texts, word_count, unk_count = convert_to_ints(clean_texts, word_count, unk_count, eos=True)


def create_lengths(text):
    lengths = []
    for setence in text:
        lengths.append(len(setence))
    return pd.DataFrame(lengths, columns=['counts'])


lengths_summaries = create_lengths(int_summaries)
lengths_texts = create_lengths(int_texts)
# 測試當前text的統計長度
print(np.percentile(lengths_texts.counts, 90))
print(np.percentile(lengths_texts.counts, 95))
print(np.percentile(lengths_texts.counts, 99))
# 測試當前summary的統計長度
print(np.percentile(lengths_summaries.counts, 90))
print(np.percentile(lengths_summaries.counts, 95))
print(np.percentile(lengths_summaries.counts, 99))

#統計unk的數目，為下一步篩選有效訓練集做準備
def unk_counter(setence):
    unk_count = 0
    for word in setence:
        if word == vocab_to_int['<UNK>']:
            unk_count += 1
            return unk_count


sorted_summaries = []
sorted_texts = []
max_text_length = 84
max_summary_length = 13
min_length = 2
unk_text_limit = 1
unk_summary_limit = 0
#按長度排序，迴圈中count為序號
for length in range(min(lengths_texts.counts), max_text_length):
    for count, words in enumerate(int_summaries):
        if (len(int_summaries[count]) >= min_length and len(int_summaries[count]) <= max_summary_length
            and len(int_texts[count]) > min_length and unk_counter(
            int_summaries[count]) <= unk_summary_limit and unk_counter(int_texts[count]) < unk_text_limit
            and length == len(int_texts[count])):
            sorted_summaries.append(int_summaries[count])
            sorted_texts.append(int_texts[count])
#以上得到經預處理後長短排序升序的texts和summary
#為輸入定義佔位符
def model_inputs():
    input_data = tf.placeholder(tf.int32, [None, None], name = 'input')#應該是batch_size*dimensions，batch_size*句長
    targets = tf.placeholder(tf.int32, [None, None], name = 'targets')#應該是batch_size*句長
    lr = tf.placeholder(tf.float32, name = 'learning_rate')#學習率應該更小一些
    keep_prob = tf.placeholder(tf.float32, name = 'keep_prob')#防止過擬合
    summary_length = tf.placeholder(tf.int32,(None, ), name = 'summary_length')#summary的長度
    max_summary_length = tf.reduce_max(summary_length, name = 'max_dec_len')#tf.reduce_max()計算各個維度上元素的最大值
    text_length = tf.placeholder(tf.int32, (None, ), name = 'text_length')#text的長度
    return input_data, targets, lr, keep_prob, summary_length, max_summary_length ,text_length
#每個batch開始階段加<GO>
def process_encoding_input(target_data, vocab_to_int, batch_size):#target就是summary
    ending = tf.strided_slice(target_data,[0,0],[batch_size,-1],[1,1])#三維切片，每一維切割都是來自於上一維切割的結果
    dec_input = tf.concat([tf.fill([batch_size,1],vocab_to_int['<GO>']),ending],1)
    return dec_input
#建立encoding層
def encoding_layer(rnn_size,sequence_length, num_layers, rnn_inputs, keep_prob):
    for layer in range(num_layers):
        with tf.variable_scope('encoder_{}'.format(layer)):
            cell_fw = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
            cell_fw = tf.nn.rnn_cell.DropoutWrapper(cell_fw,input_keep_prob=keep_prob)
            cell_bw = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
            cell_bw = tf.nn.rnn_cell.DropoutWrapper(cell_bw,input_keep_prob=keep_prob)
            enc_output, enc_state = tf.nn.bidirectional_dynamic_rnn(cell_fw,cell_bw,rnn_inputs,sequence_length, dtype=tf.float32)
        enc_output = tf.concat(enc_output,2)
    return enc_output, enc_state#enc_output應該為中間向量
def training_decoding_layer(dec_embed_input, summary_length, dec_cell, initial_state, output_layer, vocab_size, max_summary_length):#用於訓練模型
    training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=dec_embed_input, sequence_length=summary_length, time_major=False)#幫助建立一個訓練的decoder類
    training_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell,training_helper,initial_state, output_layer)#構造一個decoder
    training_logits,_ = tf.contrib.seq2seq.dynamic_decode(training_decoder,output_time_major = False, impute_finished = True,maximum_iterations = max_summary_length)#構造一個動態的decoder,返回(final_outputs, final_state, final_sequence_lengths).final_outputs是一個namedtuple，裡面包含兩項(rnn_outputs, sample_id)
    return training_logits
def inference_decoding_layer(embeddings, start_token, end_token, dec_cell, initial_state, output_layer, max_summary_length, batch_size):#decoding，解碼要有<GO>和<EOS>，用於預測
    start_token = tf.tile(tf.constant([start_token],dtype = tf.int32), [batch_size], name = 'start_token')#tile擴充套件向量
    inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embeddings,start_token,end_token)#方便最後預測，seq2seq中幫助建立Decoder的一個類，在預測時使用
    inference_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell,inference_helper,initial_state,output_layer)#構造一個decoder
    inference_logits,_ = tf.contrib.seq2seq.dynamic_decode(inference_decoder,output_time_major = False, impute_finished = True,maximum_iterations = max_summary_length)
    return inference_logits
#decoding層
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, text_length, summary_length, max_summary_length, rnn_size,vocab_to_int, keep_prob,batch_size, num_layers):
    for layer in range(num_layers):
        with tf.variable_scope('decoder_{}'.format(layer)):
            lstm = tf.nn.rnn_cell.LSTMCell(rnn_size, initializer = tf.random_uniform_initializer(-0.1,0.1,seed = 2))
            dec_cell = tf.nn.rnn_cell.DropoutWrapper(lstm,input_keep_prob=keep_prob)
    output_layer = tf.layers.Dense(vocab_size, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))#構造一個全連線的類，後續的vocab_size= len(vocab_to_int)+1仍需弄清楚
    attn_mech = tf.contrib.seq2seq.BahdanauAttention(rnn_size, enc_output, text_length, normalize = False)#集中機制
    dec_cell = tf.contrib.seq2seq.DynamicAttentionWrapper(dec_cell, attn_mech, rnn_size)
    initial_state = tf.contrib.seq2seq.DynamicAttentionWrapperState(enc_state[0])#可以理解為只給第一個，然後
    with tf.variable_scope("decode"):
        training_logits = training_decoding_layer(dec_embed_input, summary_length, dec_cell, initial_state,output_layer, vocab_size, max_summary_length)
    with tf.variable_scope("decode",reuse= True):
        inference_logits = inference_decoding_layer(embeddings, vocab_to_int['<GO>'],vocab_to_int['<EOS>'],dec_cell,initial_state,output_layer,max_summary_length,batch_size)
    return training_logits, inference_logits
def seq2seq_model(input_data, target_data, keep_prob, text_length, summary_length, max_summary_length, vocab_size, rnn_size, num_layers, vocab_to_int,batch_size):
    embeddings = word_embedding_matrix#因為要預測所有的詞，所以是全體詞彙表的長度
    enc_embed_input = tf.nn.embedding_lookup(embeddings, input_data)
    enc_output, enc_state = encoding_layer(rnn_size, text_length, num_layers, enc_embed_input,keep_prob)
    dec_input = process_encoding_input(target_data, vocab_to_int, batch_size)
    dec_embed_input = tf.nn.embedding_lookup(embeddings, dec_input)
    training_logits, inference_logits = decoding_layer(dec_embed_input,embeddings,enc_output, enc_state, vocab_size,text_length,summary_length,max_summary_length,rnn_size,vocab_to_int,keep_prob,batch_size,num_layers)
    return training_logits, inference_logits
#構造pad層
def pad_sentence_batch(sentence_batch):#pad層填充
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [vocab_to_int['<PAD>']]*(max_sentence-len(sentence)) for sentence in sentence_batch]
def get_batches(summaries, texts, batch_size):#獲取資料
    for batch_i in range(0,len(texts)//batch_size):
        start_i = batch_i*batch_size
        summaries_batch = summaries[start_i:start_i + batch_size]
        texts_batch = texts[start_i:start_i + batch_size]
        pad_summaries_batch = np.array(pad_sentence_batch(summaries_batch))
        pad_texts_batch = np.array(pad_sentence_batch(texts_batch))
        pad_summaries_lengths = []
        for summary in pad_summaries_batch:
            pad_summaries_lengths.append(len(summary))
        pad_texts_lengths = []
        for text in pad_texts_batch:
            pad_summaries_lengths.append(len(text))
        yield  pad_summaries_batch, pad_texts_batch, pad_summaries_lengths,pad_texts_lengths
epochs = 100
batch_size = 64
rnn_size = 256
num_layers = 2
learning_rate = 0.005
keep_probability = 0.75
train_graph = tf.Graph()
with train_graph.as_default():
    input_data, targets, lr, keep_prob, summary_length, text_length = model_inputs()
    training_logits, inference_logits = seq2seq_model(tf.reverse(input_data,[-1]),targets, keep_prob,text_length,summary_length
                                                      ,max_summary_length,len(vocab_to_int),rnn_size,num_layers,vocab_to_int,batch_size)#-1說明將其顛倒過來以後方便聯絡
    training_logits = tf.identity(training_logits.rnn_output, 'logits')#儲存每個單詞的概率，用於計算loss
    inference_logits = tf.identity(inference_logits.sample_id,name = 'predictions')#儲存最後的單詞結果
    masks = tf.sequence_mask(summary_length,max_summary_length,dtype=tf.float32, name='masks')#engths代表的是一個一維陣列，代表每一個sequence的長度，那麼該函式返回的是一個mask的張量，張量的維數是：(lengths.shape,maxlen)
    with tf.name_scope("optimization"):
        cost = tf.contrib.seq2seq.sequence_loss(training_logits,targets,masks)#用於計算seq2seq中的loss。當我們的輸入是不定長的時候，weights引數常常使用我們1.11中得到的mask
        optimizer = tf.train.AdamOptimizer(learning_rate)
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad,-5.,5.),var) for grad,var in gradients if grad is not None]#輸入一個張量A，把A中的每一個元素的值都壓縮在min和max之間。小於min的讓它等於min，大於max的元素的值等於max
        train_op = optimizer.apply_gradients(capped_gradients)#梯度修剪主要避免訓練梯度爆炸和消失問題
print("Graph is built")
start = 200000
end = start + 5000
sorted_summaries_short = sorted_summaries[start:end]
sorted_texts_short = sorted_texts[start:end]
learning_rate_decay = 0.95
min_learning_rate = 0.0005
display_step = 20
stop_early = 0
stop = 3
per_epoch = 3
update_check = (len(sorted_texts_short)//batch_size//per_epoch)-1
update_loss = 0
batch_loss = 0
summary_update_loss = []
checkpoint = "best_model.ckpt"
with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())
    for epoch_i in range(1,epochs+1):
        update_loss = 0
        batch_loss = 0
        for batch_i,(summaries_batch, texts_batch, summaries_lengths,texts_lengths) in enumerate(get_batches(sorted_summaries_short,sorted_texts_short,batch_size)):
            start_time = time.time()
            _,loss = sess.run([train_op,cost],{input_data:texts_batch,targets:summaries_batch,lr:learning_rate,summary_length:summaries_lengths,text_length:texts_lengths,keep_prob:keep_probability})
            batch_loss += loss
            update_loss += loss
            end_time = time.time()
            batch_time = end_time - start_time
            if batch_i % display_step == 0 and batch_i >0:
                print('Epoch{:>3}/{} Batch {:>4}/{} - Loss: {:>6.3f}, Seconds:{:>4.2f}'.format(epoch_i,epochs,batch_i,
                                                                                               len(sorted_texts_short)//batch_size,
                                                                                               batch_loss/display_step,
                                                                                               batch_time*display_step))
            if batch_i % update_check == 0 and batch_i>0:
                print("Average loss for this update:", round(update_loss/update_check,3))
                summary_update_loss.append(update_loss)
                #如果update_loss最小，則儲存模型
                if update_loss <= min(summary_update_loss):
                    print('New Record')
                    stop_early = 0
                    saver = tf.train.Saver()
                    saver.save(sess, checkpoint)
                else:
                    print('No Improvement')
                    stop_early += 1
                    if stop_early == stop:
                        break
                update_loss = 0
        learning_rate *= learning_rate_decay
        if learning_rate < min_learning_rate:
            learning_rate = min_learning_rate
        if stop_early == stop:
            print("Stopping Training")
            break
#測試效果
def text_to_seq(text):
    text = clean_text(text)
    return [vocab_to_int.get(word,vocab_to_int['<UNK>']) for word in text.split()]
random = np.random.randint(0,len(clean_texts))
input_sentence = clean_texts[random]
text = text_to_seq(clean_texts[random])
checkpoint = './best_model.ckpt'
loaded_graph = tf.Graph()
with tf.Session(graph = loaded_graph) as sess:
    loader = tf.train.import_meta_graph(checkpoint+'.meta')
    loader.restore(sess,checkpoint)
    input_data = loaded_graph.get_tensor_by_name('input:0')
    logits = loaded_graph.get_tensor_by_name('predictions:0')
    text_length = loaded_graph.get_tensor_by_name('text_length:0')
    summary_length = loaded_graph.get_tensor_by_name('summary_length:0')
    keep_prob = loaded_graph.get_tensor_by_name('input:0')
    answer_logits = sess.run(logits,{input_data:{text}*batch_size,
                                     summary_length:[np.random.randint(5,8)],
                                     text_length:[len(text)*batch_size],
                                     keep_prob:1.0})[0]
pad = vocab_to_int["<PAD>"]
print('Original Text:', input_sentence)
print('\nText')
print('Word Ids: {}'.format([i for i in text]))
print('Input Words: {}'.format(" ".join([int_to_vocab[i] for i in text])))
print("\nSummary")
print('Word Ids: {}'.format([i for i in answer_logits if i != pad]))
print('Response Words: {}'.format(" ".join([int_to_vocab[i] for i in answer_logits if i != pad])))

唐宇迪Seq2Seq程式碼+註釋（tensorflow1.2版本）

import pandas as pd import re import numpy as np import tensorflow as tf import time filename = 'E:\DataSets\Reviews.csv\Reviews.csv' r

手動實現簡單的神經網路（唐宇迪神經網路課程筆記）

手動實現一個簡單的兩層神經網路，主要目的是體驗神經網路訓練的三個步驟：1.通過w、x求出loss函式，這一步稱為前向傳播； 2.用第1步求出的loss函式根據鏈式法則（求導）求解出各個w對loss函式的貢獻值，即為反向傳播； 3.根據第2步求出的w對loss函式的貢獻值來調節w，使得loss

唐宇迪博士實戰程式碼教學視訊課程全集，帶你一起資料分析、深度學習

唐宇迪，同濟大學計算機博士，專注於機器學習與計算機視覺領域，人工智慧與資料科學領域培訓專家，上海人工智慧協會核心主幹。參與多個國家級計算機視覺與資料探勘專案，主要研究面部識別與特徵構造，異常識別等領域。多年資料領域培訓經驗，具有豐富的教學講解經驗，出品多套機器學習與深度學習系

唐宇迪深度學習課程上篇——動手完成簡單的神經網絡

random 學習課程 div abs 特征值唐宇迪 arr 神經網絡簡單的 #coding:utf-8 ‘‘‘ Created on 2017年7月21日 @author: KLKJ ‘‘‘ import numpy as np def sigmoid(x,d

CP1621-唐宇迪-python數據分析與機器實戰

imageview 算法包括 container href blank gin wid 困難深度學習框架-Tensorflow案例實戰視頻課程隨筆背景：在很多時候，很多入門不久的朋友都會問我：我是從其他語言轉到程序開發的，有沒有一些基礎性的資料給我們學習學習呢，你的框

唐宇迪-機器學習/深度學習系列課程福利大發送！不單優惠還送機器學習必備實戰書籍！

機器學習深度學習人工智能決勝AI就在今天 Hi同學們，給大家推薦一本機器學習的入門佳品：機器學習實戰。這本書可以說是我看過最通俗易懂的機器學習書籍了，並沒有上來直接闡述一些看著就頭疼的各種數學公式，而是以實際案例為出發點一步步帶領大家完成各個算法的建模與練習，人工智能必備No.1! 福利

『七月直播』人工智能第一場——人工智能學習與發展路線規劃【唐宇迪老師】

優惠時間展開前景 ont 機器你是核心通過第一場——主題：人工智能學習與發展路線規劃7月19日（周四） 20：00~21：00>主講老師：唐宇迪同濟大學計算機博士，專註於機器學習與計算機視覺領域，深度學習領域一線實戰專家，善於實現包括人臉識別，物體識別，

專訪唐宇迪博士：我是如何邁入同濟大學校園的？淺談人工智慧，未來資料探勘和計算機視覺是風口

1.網上很多同學對老師您的簡歷非常好奇，在百度搜索上發現大家都很關心“唐宇迪是哪個學校畢業的”？關於您的學習經歷能簡單說下嗎？唐宇迪：幾年前第一次邁進同濟大學校園，攻讀博士學位，並加入了資料探勘專案組，以此真正開始了機器學習之旅。學習的過程有些枯燥在所難免，但是想著可以將演算法應用於實驗當中，

視覺slam十四講ch5 joinMap.cpp 程式碼註釋（筆記版）

1 #include <iostream> 2 #include <fstream> 3 using namespace std; 4 #include <opencv2/core/core.hpp> 5 #include

唐宇迪機器學習之離職預測

最近在看唐宇迪機器學習視訊，這個視訊我覺得很不錯，可是我資源有限，有的視訊沒有配套的資料、資料集或者是程式碼，但還是可以看視訊瞭解其中的一些知識點。專案介紹該專案是通過員工對公司的滿意程度、公司對員工的評估、員工薪資水平、員工崗位、員工工作時長等特徵來推斷員

唐宇迪深度學習框架Caffe系列-11

deploy.prototxt 這個檔案和訓練檔案 .prototxt 很相似，但是他是在模型生成後，測試模型使用的配置檔案內容上，除了資料層，其他層都是一模一樣的 caffe提供示例的地址：/home/apple/caffe/models/bvlc_reference_caffene

唐宇迪深度學習框架Caffe系列-10

繪製LOSS曲線安裝matplotlib庫(這個庫需要安裝python-tk) sudo apt-get install python-tk sudo pip install matplotlib import numpy as np import matplotl

隨機森林程式碼註釋（C++版本）

</pre><pre name="code"class="cpp">#include <iostream> #include <fstream> #include <sstream> #include "random_forest.h" usin

決策樹，decision的pyton程式碼和註釋（機器學習實戰）

Decison Tree的註釋：畫圖部分不給註釋了 from math import log import numpy def calcShannonEnt(dataSet): numEntries = len(dataSet) labelCounts =

k-近鄰演算法程式碼註釋（一）

from numpy import * import operator def createDataSet(): group = array([[1.0,1.1],[1.0,1.0]

鄒博機器學習演算法最新版( 吳恩達前輩、唐宇迪老師、張志華老師多家對比，入門最優 ) --- 獻給想要入門、或者想要進階的朋友

慌慌張張,匆匆忙忙,生活本來就是這樣很喜歡郝雲的《活著》這首歌，很生動的描述了現代年輕上班族的生活。時光飛逝，從開始接觸機器學習已經一年多了，現已成功從安卓移動端轉戰機器學習現在也如願從事機器學習的工作，雖初出茅廬，卻也拿到了比較滿意的25+ 想起當

程式碼註釋（code comments)

常見的程式碼tag： FIXME - should be corrected. HACK - a workaround. TODO - something to be done. UNDONE - a reversal or “roll back” of

Eclipse中自動添加註釋（作者，時間）

ava 進入 new java 模式 window templates nts 自動方法一：Eclipse中設置在創建新類時自動生成註釋 windows-->preference Java-->Code Style-->Code Templates co

面向對象設計原則迪米特法則（Law of Demeter）

ast 關系 ade 描述松耦合系統中介模式 dem 可能迪米特法則（Law of Demeter）又叫作最少知識原則（Least Knowledge Principle 簡寫LKP），英文簡寫為: LoD. 這是一種面向對象程序設計的指導原則，它描述了

pyhon檔案操作典型程式碼實現（非常經典！）

1. 編寫一個程式，統計當前目錄下每個檔案型別的檔案數，程式實現如圖：實現程式碼： import os all_files = os.listdir(os.chdir("D:\\")) type_dict = dict() for each_file in all_files: if os.pa

唐宇迪Seq2Seq程式碼+註釋（tensorflow1.2版本）

相關推薦