001/* 002 * Copyright © 2017 The Archives Unleashed Project 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package io.archivesunleashed.data; 018 019import java.io.DataInput; 020import java.io.DataOutput; 021import java.io.IOException; 022import org.apache.hadoop.io.Writable; 023import org.archive.io.ArchiveRecord; 024import org.archive.io.arc.ARCRecord; 025import org.archive.io.warc.WARCRecord; 026 027/** Implements Hadoop Writable for Archive Records. */ 028public class ArchiveRecordWritable implements Writable { 029 030 /** Archive Formats that can be used. {@link #UNKNOWN} {@link #ARC} {@link #WARC} */ 031 public enum ArchiveFormat { 032 /** UNKNOWN format. */ 033 UNKNOWN, 034 035 /** ARC format. */ 036 ARC, 037 038 /** WARC format. */ 039 WARC 040 } 041 042 /** Set default Record format to UNKNOWN. */ 043 private ArchiveFormat format = ArchiveFormat.UNKNOWN; 044 045 /** Initialize Archive Record to null. */ 046 private ArchiveRecord record = null; 047 048 /** Utility function. */ 049 public ArchiveRecordWritable() {} 050 051 /** 052 * Initialize Archive Record. 053 * 054 * @param r Archive Record 055 */ 056 public ArchiveRecordWritable(final ArchiveRecord r) { 057 this.record = r; 058 detectFormat(); 059 } 060 061 /** 062 * Set Archive Record. 063 * 064 * @param r Archive Record 065 */ 066 public final void setRecord(final ArchiveRecord r) { 067 this.record = r; 068 detectFormat(); 069 } 070 071 /** 072 * Get Archive Record. 073 * 074 * @return record Archive Record 075 */ 076 public final ArchiveRecord getRecord() { 077 return record; 078 } 079 080 /** Detect format of Archive Record. */ 081 public final void detectFormat() { 082 if (record instanceof ARCRecord) { 083 format = ArchiveFormat.ARC; 084 } else if (record instanceof WARCRecord) { 085 format = ArchiveFormat.WARC; 086 } else { 087 format = ArchiveFormat.UNKNOWN; 088 } 089 } 090 091 /** 092 * Get format of Archive Record. 093 * 094 * @return format of Archive Record 095 */ 096 public final ArchiveFormat getFormat() { 097 return format; 098 } 099 100 /** 101 * Set format of Archive Record. 102 * 103 * @param f format of Archive Record 104 */ 105 public final void setFormat(final ArchiveFormat f) { 106 this.format = f; 107 } 108 109 @Override 110 public final void readFields(final DataInput in) throws IOException { 111 int len = in.readInt(); 112 if (len == 0) { 113 this.record = null; 114 return; 115 } 116 117 byte[] bytes = new byte[len]; 118 in.readFully(bytes); 119 120 if (getFormat() == ArchiveFormat.ARC) { 121 this.record = ArcRecordUtils.fromBytes(bytes); 122 } else if (getFormat() == ArchiveFormat.WARC) { 123 this.record = WarcRecordUtils.fromBytes(bytes); 124 } else { 125 this.record = null; 126 } 127 } 128 129 @Override 130 public final void write(final DataOutput out) throws IOException { 131 if (record == null) { 132 out.writeInt(0); 133 } 134 byte[] bytes; 135 136 if (getFormat() == ArchiveFormat.ARC) { 137 bytes = ArcRecordUtils.toBytes((ARCRecord) record); 138 } else if (getFormat() == ArchiveFormat.WARC) { 139 bytes = WarcRecordUtils.toBytes((WARCRecord) record); 140 } else { 141 bytes = null; 142 } 143 144 out.writeInt(bytes.length); 145 out.write(bytes); 146 } 147}