Unable to parse the default media type registry

原创
2016/06/02 18:15
阅读数 1K

今天写了一段获取MIME类型的代码,对比用org.apache.tika和net.sf.jmimemagic。

jdk版本是1.8.

1.pom.xml:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>hui</groupId>
	<artifactId>TestWithMaven</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>TestWithMaven</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
		<dependency>
			<groupId>joda-time</groupId>
			<artifactId>joda-time</artifactId>
			<version>2.9.2</version>
		</dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>1.7.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.ibatis</groupId>
			<artifactId>ibatis-core</artifactId>
			<version>3.0</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis</artifactId>
			<version>3.4.0</version>
		</dependency>
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.38</version>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
		</dependency>
		<dependency>
			<groupId>org.hamcrest</groupId>
			<artifactId>hamcrest-core</artifactId>
			<version>1.3</version>
		</dependency>

		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-context-support</artifactId>
			<version>4.2.2.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.tika</groupId>
			<artifactId>tika-core</artifactId>
			<version>1.13</version>
		</dependency>

		<dependency>
			<groupId>net.sf.jmimemagic</groupId>
			<artifactId>jmimemagic</artifactId>
			<version>0.1.4</version>
		</dependency>
		<dependency>
			<groupId>xml-apis</groupId>
			<artifactId>xmlParserAPIs</artifactId>
			<version>2.0.2</version>
		</dependency>




	</dependencies>
</project>

2.FileUtils.java:

package mime;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.FileNameMap;
import java.net.URLConnection;
import java.net.URLEncoder;

import javax.activation.MimetypesFileTypeMap;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.mail.javamail.ConfigurableMimeFileTypeMap;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import net.sf.jmimemagic.Magic;
import net.sf.jmimemagic.MagicException;
import net.sf.jmimemagic.MagicMatch;
import net.sf.jmimemagic.MagicMatchNotFoundException;
import net.sf.jmimemagic.MagicParseException;

public class FileUtils {

	public static String getMimeTypeByFileTypeMap(String path) {
		MimetypesFileTypeMap mimetypesFileTypeMap = new MimetypesFileTypeMap();
		// 默认没有pdf的,如果传入pdf的,会默认application/octet-stream,也没有application/xml
		mimetypesFileTypeMap.addMimeTypes("application/pdf pdf");
		File f = new File(path);
		return mimetypesFileTypeMap.getContentType(f);
	}

	public static String getMimeTypeByFileTypeMap2(String path) {
		String mimeType = null;
		int idx = path.lastIndexOf('.');
		if (idx == -1) {
			mimeType = "application/octet-stream";
		} else {
			String fileExtension = path.substring(idx).toLowerCase();
			if (fileExtension.equals(".html")) {
				mimeType = "text/html";
			} else if (fileExtension.equals(".css")) {
				mimeType = "text/css";
			} else if (fileExtension.equals(".js")) {
				mimeType = "application/javascript";
			} else if (fileExtension.equals(".gif")) {
				mimeType = "image/gif";
			} else if (fileExtension.equals(".png")) {
				mimeType = "image/png";
			} else if (fileExtension.equals(".txt")) {
				mimeType = "text/plain";
			} else if (fileExtension.equals(".xml")) {
				mimeType = "application/xml";
			} else if (fileExtension.equals(".json")) {
				mimeType = "application/json";
			} else {
				MimetypesFileTypeMap mimeTypesMap = new MimetypesFileTypeMap();
				mimeType = mimeTypesMap.getContentType(path);
			}
		}
		return mimeType;

	}

	public static String getMimeTypeBySpring(String path) {
		ConfigurableMimeFileTypeMap mimeMap = new ConfigurableMimeFileTypeMap();
		// 没有application/xml
		String contentType = mimeMap.getContentType(path);
		return contentType;
	}

	public static String getMimeByFileNameMap(String fileUrl) {
		FileNameMap fileNameMap = URLConnection.getFileNameMap();
		try {
			String mimeType = fileNameMap
					.getContentTypeFor(URLEncoder.encode(fileUrl, "UTF-8"));
			if (mimeType == null) {
				mimeType = "application/octet-stream";
			}
			return mimeType;
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
			return "";
		}
	}

	public static String getMimeByTika(String fileUrl) {
		String mimeType = null;
		try {
			ContentHandler contenthandler = new BodyContentHandler();

			Metadata metadata = new Metadata();
			metadata.add(Metadata.CONTENT_ENCODING, "utf-8");
			metadata.set(Metadata.RESOURCE_NAME_KEY, fileUrl);

			// Parser parser = new DefaultParser();获取不到MIME类型
			Parser parser = new AutoDetectParser();

			ParseContext context = new ParseContext();
			context.set(Parser.class, parser);

			parser.parse(new FileInputStream(fileUrl), contenthandler, metadata, context);

			for (String name : metadata.names()) {
				System.out.println(name);
			}
			mimeType = metadata.get(Metadata.CONTENT_TYPE);
		} catch (IOException | TikaException e) {
			e.printStackTrace();
		} catch (SAXException e) {
			e.printStackTrace();
		}
		return mimeType;

	}

	public static String getMimeByJMimeMagic(String fileUrl) {
		MagicMatch match;
		try {
			match = Magic.getMagicMatch(new File(fileUrl), true);
			return match.getMimeType();
		} catch (MagicParseException | MagicMatchNotFoundException | MagicException e) {
			e.printStackTrace();
		}
		return "";
	}

}

3.MIMETest.java:

package mime;

public class MIMETest {
	public static void main(String[] args) {

		// src目录
		// String fileName = "funds.properties";
		String fileName = "createPerson.sql";
		// String path = getPath(fileName);

		// 绝对路径
		// String path = "E:/test/process.txt";
		// String path = "E:/test/02.jpg";
		// String path = "E:/Anheng/receiver-design.pdf";
		// String path = "E:/api/dom4j.chm";
		// String path = "E:/eclipse/ajax/pom.xml";
		// String path = "E:/test/person.json";
		// String path = "E:/test/file.java";
		// String path = "E:/test/static.ftl";
		// String path = "E:/test/rest.jerseySpring.war";
		// String path = "E:/test/upload/myeclipse.exe";
		String path = "E:/test/upload/myeclipse.ini";

		System.out.println("getMimeTypeByFileTypeMap: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeByFileTypeMap(path));

		System.out.println("getMimeTypeByFileTypeMap2: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeByFileTypeMap2(path));

		System.out.println("getMimeTypeBySpring: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeBySpring(path));

		System.out.println("getMimeByFileNameMap: Mime Type of " + path + " is "
				+ FileUtils.getMimeByFileNameMap(path));

		/* Tika会检查路径的合法性;
		 而且properties文件会返回text/x-java-properties,以上只会返回application/octet-stream;
		 .sql文件会返回text/x-sql,以上几种也是只会返回application/octet-stream;
		 .json文件会返回application/json,以上几种除非添加了这一类型,不然返回application/octet-stream;
		 .java文件会返回text/x-java-source,前两种返回application/octet-stream,后两种返回text/plain;
		 .ftl文件会返回text/html,以上返回application/octet-stream;
		 .war文件会返回application/x-tika-java-web-archive,以上返回application/octet-stream;
		 .exe文件会返回application/x-dosexec,以上返回application/octet-stream;
		 .ini文件会返回text/x-ini,以上返回application/octet-stream;
		 */

		System.out.println("getMimeByTika: Mime Type of " + path + " is "
				+ FileUtils.getMimeByTika(path));

		System.out.println("getMimeByJMimeMagic: Mime Type of " + path + " is "
				+ FileUtils.getMimeByJMimeMagic(path));

	}

	private static String getPath(String fileName) {
		String prefix = System.getProperty("user.dir");
		String fileSeparator = System.getProperty("file.separator");
		String sourcePath = fileSeparator + "src" + fileSeparator + "main" + fileSeparator
				+ "resources" + fileSeparator;
		String path = prefix + sourcePath + fileName;
		return path;
	}
}

原本只测试Tika,即不加入jmimemagic的依赖时,测试正常,后来加入jmimemagic依赖,报错如下:

Exception in thread "main" java.lang.RuntimeException: Unable to parse the default media type registry
	at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:580)
	at org.apache.tika.config.TikaConfig.getDefaultMimeTypes(TikaConfig.java:69)
	at org.apache.tika.config.TikaConfig.<init>(TikaConfig.java:218)
	at org.apache.tika.config.TikaConfig.getDefaultConfig(TikaConfig.java:341)
	at org.apache.tika.parser.AutoDetectParser.<init>(AutoDetectParser.java:51)
	at mime.FileUtils.getMimeByTika(FileUtils.java:103)
	at mime.MIMETest.main(MIMETest.java:48)
Caused by: org.apache.tika.mime.MimeTypeException: Invalid type configuration
	at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:126)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:64)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:93)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:170)
	at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:577)
	... 6 more
Caused by: org.xml.sax.SAXNotRecognizedException: http://javax.xml.XMLConstants/feature/secure-processing
	at org.apache.xerces.parsers.AbstractSAXParser.setFeature(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserImpl.setFeatures(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserImpl.<init>(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserFactoryImpl.newSAXParserImpl(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserFactoryImpl.setFeature(Unknown Source)
	at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:119)
	... 10 more

按照错误提示,在FileUtils.java:103即getMimeByTika方法下的Parser parser = new AutoDetectParser();处打断点,在加net.sf.jmimemagi依赖前后对比异常原因,发现了下面一个现象 :

加net.sf.jmimemagi 前,javax.xml.parsers.SAXParserFactory的子类是com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl,该类在jdk自带jar包

rt.jar-->com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class下,如图:

而加net.sf.jmimemagi后,javax.xml.parsers.SAXParserFactory的子类变成了

xercesImpl-2.2.4.0.jar>xercom.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class,

如图:

该类在setFeature()时抛出了异常。即有两个相同名称的实现类,导致冲突报了异常。故我们将xercesImpl-2.2.4.0.jar排除掉即可,修改后的pom.xml如下所示:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>hui</groupId>
	<artifactId>TestWithMaven</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>TestWithMaven</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
		<dependency>
			<groupId>joda-time</groupId>
			<artifactId>joda-time</artifactId>
			<version>2.9.2</version>
		</dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>1.7.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.ibatis</groupId>
			<artifactId>ibatis-core</artifactId>
			<version>3.0</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis</artifactId>
			<version>3.4.0</version>
		</dependency>
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.38</version>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
		</dependency>
		<dependency>
			<groupId>org.hamcrest</groupId>
			<artifactId>hamcrest-core</artifactId>
			<version>1.3</version>
		</dependency>

		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-context-support</artifactId>
			<version>4.2.2.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.tika</groupId>
			<artifactId>tika-core</artifactId>
			<version>1.13</version>
		</dependency>

		<dependency>
			<groupId>net.sf.jmimemagic</groupId>
			<artifactId>jmimemagic</artifactId>
			<version>0.1.4</version>
			<exclusions>
				<exclusion>
						<groupId>xerces</groupId>
						<artifactId>xercesImpl</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
			<groupId>xml-apis</groupId>
			<artifactId>xmlParserAPIs</artifactId>
			<version>2.0.2</version>
		</dependency>




	</dependencies>
</project>

至此,再运行,则各方法都不再抛异常。

展开阅读全文
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部