Java 實現按行讀取檔案並且將行中的重複資料刪除
package com.gaden.delerepeat;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.TreeSet;
import com.gaden.Transilate.WriteFile;
public class DeleRepeat {
public static void main(String[] args ) throws Exception {
if(args.length==2){
String inputPath = args[0];
String outputPath = args[1];
String content = readFileDeleReapet(inputPath);
WriteFile.writeFile(outputPath,content);
}else{
System.out.println("Error args!");
}
}
public static String Delerepeat(String line){
String ls="";
line = line.replaceAll("[\\pP‘’“”]", ""); //把所有標點符號都去掉
line = line.replaceAll("\\s{2
String[] str = line.trim().split(" ");
TreeSet<String> set = new TreeSet<String>();
for (int i = 0; i < str.length; i++) {
set.add(str[i]);
}
str = (String[]) set.toArray(new String[0]);
for (int i = 0; i < str.length; i++) {
ls += str[i]+" ";
}
System.out.println(ls);
return ls;
}
public static String readFileDeleReapet(String filePathAndName) {
String fileContent = "";
try {
File f = new File(filePathAndName);
if (f.isFile() && f.exists()) {
InputStreamReader read = new InputStreamReader(
new FileInputStream(f), "UTF-8");
BufferedReader reader = new BufferedReader(read);
String line;
while ((line = reader.readLine()) != null) {
fileContent += Delerepeat(line)+"\r\n";
}
read.close();
}
} catch (Exception e) {
System.out.println("du wenjian cuo wu");
e.printStackTrace();
}
return fileContent;
}
}
匹配URL:
regex_luo = re.compile(
r'[(?:http|ftp)s?://]?' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' #domain...
r'localhost|' #localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?:\d+)?' # optional por
r'(?:/[a-zA-Z0-9\&%_\./-~-]*)?', re.IGNORECASE)