文档章节

Unable to parse the default media type registry

liuhuics10
 liuhuics10
发布于 2016/06/02 18:15
字数 1347
阅读 888
收藏 0

今天写了一段获取MIME类型的代码,对比用org.apache.tika和net.sf.jmimemagic。

jdk版本是1.8.

1.pom.xml:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>hui</groupId>
	<artifactId>TestWithMaven</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>TestWithMaven</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
		<dependency>
			<groupId>joda-time</groupId>
			<artifactId>joda-time</artifactId>
			<version>2.9.2</version>
		</dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>1.7.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.ibatis</groupId>
			<artifactId>ibatis-core</artifactId>
			<version>3.0</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis</artifactId>
			<version>3.4.0</version>
		</dependency>
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.38</version>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
		</dependency>
		<dependency>
			<groupId>org.hamcrest</groupId>
			<artifactId>hamcrest-core</artifactId>
			<version>1.3</version>
		</dependency>

		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-context-support</artifactId>
			<version>4.2.2.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.tika</groupId>
			<artifactId>tika-core</artifactId>
			<version>1.13</version>
		</dependency>

		<dependency>
			<groupId>net.sf.jmimemagic</groupId>
			<artifactId>jmimemagic</artifactId>
			<version>0.1.4</version>
		</dependency>
		<dependency>
			<groupId>xml-apis</groupId>
			<artifactId>xmlParserAPIs</artifactId>
			<version>2.0.2</version>
		</dependency>




	</dependencies>
</project>

2.FileUtils.java:

package mime;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.FileNameMap;
import java.net.URLConnection;
import java.net.URLEncoder;

import javax.activation.MimetypesFileTypeMap;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.springframework.mail.javamail.ConfigurableMimeFileTypeMap;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import net.sf.jmimemagic.Magic;
import net.sf.jmimemagic.MagicException;
import net.sf.jmimemagic.MagicMatch;
import net.sf.jmimemagic.MagicMatchNotFoundException;
import net.sf.jmimemagic.MagicParseException;

public class FileUtils {

	public static String getMimeTypeByFileTypeMap(String path) {
		MimetypesFileTypeMap mimetypesFileTypeMap = new MimetypesFileTypeMap();
		// 默认没有pdf的,如果传入pdf的,会默认application/octet-stream,也没有application/xml
		mimetypesFileTypeMap.addMimeTypes("application/pdf pdf");
		File f = new File(path);
		return mimetypesFileTypeMap.getContentType(f);
	}

	public static String getMimeTypeByFileTypeMap2(String path) {
		String mimeType = null;
		int idx = path.lastIndexOf('.');
		if (idx == -1) {
			mimeType = "application/octet-stream";
		} else {
			String fileExtension = path.substring(idx).toLowerCase();
			if (fileExtension.equals(".html")) {
				mimeType = "text/html";
			} else if (fileExtension.equals(".css")) {
				mimeType = "text/css";
			} else if (fileExtension.equals(".js")) {
				mimeType = "application/javascript";
			} else if (fileExtension.equals(".gif")) {
				mimeType = "image/gif";
			} else if (fileExtension.equals(".png")) {
				mimeType = "image/png";
			} else if (fileExtension.equals(".txt")) {
				mimeType = "text/plain";
			} else if (fileExtension.equals(".xml")) {
				mimeType = "application/xml";
			} else if (fileExtension.equals(".json")) {
				mimeType = "application/json";
			} else {
				MimetypesFileTypeMap mimeTypesMap = new MimetypesFileTypeMap();
				mimeType = mimeTypesMap.getContentType(path);
			}
		}
		return mimeType;

	}

	public static String getMimeTypeBySpring(String path) {
		ConfigurableMimeFileTypeMap mimeMap = new ConfigurableMimeFileTypeMap();
		// 没有application/xml
		String contentType = mimeMap.getContentType(path);
		return contentType;
	}

	public static String getMimeByFileNameMap(String fileUrl) {
		FileNameMap fileNameMap = URLConnection.getFileNameMap();
		try {
			String mimeType = fileNameMap
					.getContentTypeFor(URLEncoder.encode(fileUrl, "UTF-8"));
			if (mimeType == null) {
				mimeType = "application/octet-stream";
			}
			return mimeType;
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
			return "";
		}
	}

	public static String getMimeByTika(String fileUrl) {
		String mimeType = null;
		try {
			ContentHandler contenthandler = new BodyContentHandler();

			Metadata metadata = new Metadata();
			metadata.add(Metadata.CONTENT_ENCODING, "utf-8");
			metadata.set(Metadata.RESOURCE_NAME_KEY, fileUrl);

			// Parser parser = new DefaultParser();获取不到MIME类型
			Parser parser = new AutoDetectParser();

			ParseContext context = new ParseContext();
			context.set(Parser.class, parser);

			parser.parse(new FileInputStream(fileUrl), contenthandler, metadata, context);

			for (String name : metadata.names()) {
				System.out.println(name);
			}
			mimeType = metadata.get(Metadata.CONTENT_TYPE);
		} catch (IOException | TikaException e) {
			e.printStackTrace();
		} catch (SAXException e) {
			e.printStackTrace();
		}
		return mimeType;

	}

	public static String getMimeByJMimeMagic(String fileUrl) {
		MagicMatch match;
		try {
			match = Magic.getMagicMatch(new File(fileUrl), true);
			return match.getMimeType();
		} catch (MagicParseException | MagicMatchNotFoundException | MagicException e) {
			e.printStackTrace();
		}
		return "";
	}

}

3.MIMETest.java:

package mime;

public class MIMETest {
	public static void main(String[] args) {

		// src目录
		// String fileName = "funds.properties";
		String fileName = "createPerson.sql";
		// String path = getPath(fileName);

		// 绝对路径
		// String path = "E:/test/process.txt";
		// String path = "E:/test/02.jpg";
		// String path = "E:/Anheng/receiver-design.pdf";
		// String path = "E:/api/dom4j.chm";
		// String path = "E:/eclipse/ajax/pom.xml";
		// String path = "E:/test/person.json";
		// String path = "E:/test/file.java";
		// String path = "E:/test/static.ftl";
		// String path = "E:/test/rest.jerseySpring.war";
		// String path = "E:/test/upload/myeclipse.exe";
		String path = "E:/test/upload/myeclipse.ini";

		System.out.println("getMimeTypeByFileTypeMap: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeByFileTypeMap(path));

		System.out.println("getMimeTypeByFileTypeMap2: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeByFileTypeMap2(path));

		System.out.println("getMimeTypeBySpring: Mime Type of " + path + " is "
				+ FileUtils.getMimeTypeBySpring(path));

		System.out.println("getMimeByFileNameMap: Mime Type of " + path + " is "
				+ FileUtils.getMimeByFileNameMap(path));

		/* Tika会检查路径的合法性;
		 而且properties文件会返回text/x-java-properties,以上只会返回application/octet-stream;
		 .sql文件会返回text/x-sql,以上几种也是只会返回application/octet-stream;
		 .json文件会返回application/json,以上几种除非添加了这一类型,不然返回application/octet-stream;
		 .java文件会返回text/x-java-source,前两种返回application/octet-stream,后两种返回text/plain;
		 .ftl文件会返回text/html,以上返回application/octet-stream;
		 .war文件会返回application/x-tika-java-web-archive,以上返回application/octet-stream;
		 .exe文件会返回application/x-dosexec,以上返回application/octet-stream;
		 .ini文件会返回text/x-ini,以上返回application/octet-stream;
		 */

		System.out.println("getMimeByTika: Mime Type of " + path + " is "
				+ FileUtils.getMimeByTika(path));

		System.out.println("getMimeByJMimeMagic: Mime Type of " + path + " is "
				+ FileUtils.getMimeByJMimeMagic(path));

	}

	private static String getPath(String fileName) {
		String prefix = System.getProperty("user.dir");
		String fileSeparator = System.getProperty("file.separator");
		String sourcePath = fileSeparator + "src" + fileSeparator + "main" + fileSeparator
				+ "resources" + fileSeparator;
		String path = prefix + sourcePath + fileName;
		return path;
	}
}

原本只测试Tika,即不加入jmimemagic的依赖时,测试正常,后来加入jmimemagic依赖,报错如下:

Exception in thread "main" java.lang.RuntimeException: Unable to parse the default media type registry
	at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:580)
	at org.apache.tika.config.TikaConfig.getDefaultMimeTypes(TikaConfig.java:69)
	at org.apache.tika.config.TikaConfig.<init>(TikaConfig.java:218)
	at org.apache.tika.config.TikaConfig.getDefaultConfig(TikaConfig.java:341)
	at org.apache.tika.parser.AutoDetectParser.<init>(AutoDetectParser.java:51)
	at mime.FileUtils.getMimeByTika(FileUtils.java:103)
	at mime.MIMETest.main(MIMETest.java:48)
Caused by: org.apache.tika.mime.MimeTypeException: Invalid type configuration
	at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:126)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:64)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:93)
	at org.apache.tika.mime.MimeTypesFactory.create(MimeTypesFactory.java:170)
	at org.apache.tika.mime.MimeTypes.getDefaultMimeTypes(MimeTypes.java:577)
	... 6 more
Caused by: org.xml.sax.SAXNotRecognizedException: http://javax.xml.XMLConstants/feature/secure-processing
	at org.apache.xerces.parsers.AbstractSAXParser.setFeature(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserImpl.setFeatures(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserImpl.<init>(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserFactoryImpl.newSAXParserImpl(Unknown Source)
	at org.apache.xerces.jaxp.SAXParserFactoryImpl.setFeature(Unknown Source)
	at org.apache.tika.mime.MimeTypesReader.read(MimeTypesReader.java:119)
	... 10 more

按照错误提示,在FileUtils.java:103即getMimeByTika方法下的Parser parser = new AutoDetectParser();处打断点,在加net.sf.jmimemagi依赖前后对比异常原因,发现了下面一个现象 :

加net.sf.jmimemagi 前,javax.xml.parsers.SAXParserFactory的子类是com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl,该类在jdk自带jar包

rt.jar-->com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class下,如图:

而加net.sf.jmimemagi后,javax.xml.parsers.SAXParserFactory的子类变成了

xercesImpl-2.2.4.0.jar>xercom.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl.class,

如图:

该类在setFeature()时抛出了异常。即有两个相同名称的实现类,导致冲突报了异常。故我们将xercesImpl-2.2.4.0.jar排除掉即可,修改后的pom.xml如下所示:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>hui</groupId>
	<artifactId>TestWithMaven</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>TestWithMaven</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
		<dependency>
			<groupId>joda-time</groupId>
			<artifactId>joda-time</artifactId>
			<version>2.9.2</version>
		</dependency>
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-log4j12</artifactId>
			<version>1.7.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.ibatis</groupId>
			<artifactId>ibatis-core</artifactId>
			<version>3.0</version>
		</dependency>
		<dependency>
			<groupId>org.mybatis</groupId>
			<artifactId>mybatis</artifactId>
			<version>3.4.0</version>
		</dependency>
		<dependency>
			<groupId>mysql</groupId>
			<artifactId>mysql-connector-java</artifactId>
			<version>5.1.38</version>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
		</dependency>
		<dependency>
			<groupId>org.hamcrest</groupId>
			<artifactId>hamcrest-core</artifactId>
			<version>1.3</version>
		</dependency>

		<dependency>
			<groupId>org.springframework</groupId>
			<artifactId>spring-context-support</artifactId>
			<version>4.2.2.RELEASE</version>
		</dependency>
		<dependency>
			<groupId>org.apache.tika</groupId>
			<artifactId>tika-core</artifactId>
			<version>1.13</version>
		</dependency>

		<dependency>
			<groupId>net.sf.jmimemagic</groupId>
			<artifactId>jmimemagic</artifactId>
			<version>0.1.4</version>
			<exclusions>
				<exclusion>
						<groupId>xerces</groupId>
						<artifactId>xercesImpl</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
			<groupId>xml-apis</groupId>
			<artifactId>xmlParserAPIs</artifactId>
			<version>2.0.2</version>
		</dependency>




	</dependencies>
</project>

至此,再运行,则各方法都不再抛异常。

© 著作权归作者所有

liuhuics10
粉丝 4
博文 97
码字总数 39010
作品 0
烟台
私信 提问
加载中

评论(0)

docker在centos7上安装私服镜像各种操蛋修复

centos6.5, docker 1.7.1 vi /etc/sysconfig/docker #修改 otherargs="--insecure-registry 192.168.1.110:5000" #重启服务 service docker restart centos7.2,docker 1.9.1; vi /usr/lib/s......

testwork
2016/05/09
997
0
Python argparse 模块参考手册

是 Python 标准库中用来解析命令行参数和选项的模块,其是为替代已经过时的 optparse 模块而生的,该模块在 Python2.7 中被引入。argparse模块的作用是用于解析命令行参数。 创建解析器 使用...

Konghy
2016/06/30
286
0
Spring 3.1.0.RC2 发布

Bug [SPR-6510] - JodaTime failing to parse date format with timezone name [SPR-7721] - formMultiSelect macro in spring.ftl causes freemarker.template.TemplateException [SPR-7881......

咖啡碼農
2011/11/29
1.9K
4
mvn archetype:create 命令报错

把错误全贴出来了,没看懂啥意思。 jdk版本1.7.0_75 ERROR] Failed to execute goal org.apache.maven.plugins:maven-archetype-plugin:2.4:create (default-cli) on project standalone-pom......

super-wen
2015/12/14
3.2K
1
根据图片路径转URI

private String getUri(String picpath){ Uri mUri = Uri.parse("content://media/external/images/media"); Uri mImageUri = null; @SuppressWarnings("deprecation") Cursor cursor = mana......

tomcater
2015/12/21
143
0

没有更多内容

加载失败,请刷新页面

加载更多

在两个日期之间查找对象MongoDB

我一直在围绕在mongodb中存储推文,每个对象看起来像这样: {"_id" : ObjectId("4c02c58de500fe1be1000005"),"contributors" : null,"text" : "Hello world","user" : { "following......

javail
15分钟前
35
0
《aelf经济和治理白皮书》重磅发布:为DAPP提供治理高效、价值驱动的生态环境

2020年2月17日,aelf正式发布《aelf经济和治理白皮书》,这是aelf继项目白皮书后,在aelf网络经济模型和治理模式方面的权威论述。 《aelf经济和治理白皮书》描述了aelf生态中各个角色及利益的...

AELF开发者社区
26分钟前
44
0
EditText的首字母大写

我正在开发一个小小的个人待办事项列表应用程序,到目前为止,一切都运行良好。 我想知道一个小怪癖。 每当我去添加一个新项目时,我都会看到一个带有EditText视图的Dialog。 当我选择EditT...

技术盛宴
30分钟前
30
0
战疫 | 高德工程师如何在3天上线“医护专车”

新冠状病毒肺炎疫情突袭,无数医护人员放弃与家人团聚,明知凶险,仍然奋战在一线。但因为武汉公交、地铁、网约车停运,医护人员上下班很难。白衣天使疾呼打车难。 (截图摘自《财经国家周刊...

amap_tech
37分钟前
41
0
img在IE中无法按比例显示

在IE浏览器中使用img标签当给img标签设置width:98%时,显示时还是会把img的原始高度显示出来 解决方式给父标签设置width,但width不能使用100%需要指定一个值 <div style="width:900px;"> ...

有理想的鸭子
38分钟前
45
0

没有更多内容

加载失败,请刷新页面

加载更多

返回顶部
顶部