001/* 002 * Copyright © 2017 The Archives Unleashed Project 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package io.archivesunleashed.data; 018 019import org.apache.hadoop.io.Writable; 020import org.archive.io.ArchiveRecord; 021import org.archive.io.arc.ARCRecord; 022import org.archive.io.warc.WARCRecord; 023 024import java.io.DataInput; 025import java.io.DataOutput; 026import java.io.IOException; 027 028/** 029 * Implements Hadoop Writable for Archive Records. 030 */ 031public class ArchiveRecordWritable implements Writable { 032 033 /** 034 * Archive Formats that can be used. 035 * {@link #UNKNOWN} 036 * {@link #ARC} 037 * {@link #WARC} 038 */ 039 public enum ArchiveFormat { 040 /** 041 * UNKNOWN format. 042 */ 043 UNKNOWN, 044 045 /** 046 * ARC format. 047 */ 048 ARC, 049 050 /** 051 * WARC format. 052 */ 053 WARC 054 } 055 056 /** 057 * Set default Record format to UNKNOWN. 058 */ 059 private ArchiveFormat format = ArchiveFormat.UNKNOWN; 060 061 /** 062 * Initialize Archive Record to null. 063 */ 064 private ArchiveRecord record = null; 065 066 /** 067 * Utility function. 068 */ 069 public ArchiveRecordWritable() { 070 } 071 072 /** 073 * Initialize Archive Record. 074 * 075 * @param r Archive Record 076 */ 077 public ArchiveRecordWritable(final ArchiveRecord r) { 078 this.record = r; 079 detectFormat(); 080 } 081 082 /** 083 * Set Archive Record. 084 * 085 * @param r Archive Record 086 */ 087 public final void setRecord(final ArchiveRecord r) { 088 this.record = r; 089 detectFormat(); 090 } 091 092 /** 093 * Get Archive Record. 094 * 095 * @return record Archive Record 096 */ 097 public final ArchiveRecord getRecord() { 098 return record; 099 } 100 101 /** 102 * Detect format of Archive Record. 103 */ 104 public final void detectFormat() { 105 if (record instanceof ARCRecord) { 106 format = ArchiveFormat.ARC; 107 } else if (record instanceof WARCRecord) { 108 format = ArchiveFormat.WARC; 109 } else { 110 format = ArchiveFormat.UNKNOWN; 111 } 112 } 113 114 /** 115 * Get format of Archive Record. 116 * 117 * @return format of Archive Record 118 */ 119 public final ArchiveFormat getFormat() { 120 return format; 121 } 122 123 /** 124 * Set format of Archive Record. 125 * 126 * @param f format of Archive Record 127 */ 128 public final void setFormat(final ArchiveFormat f) { 129 this.format = f; 130 } 131 132 @Override 133 public final void readFields(final DataInput in) throws IOException { 134 int len = in.readInt(); 135 if (len == 0) { 136 this.record = null; 137 return; 138 } 139 140 byte[] bytes = new byte[len]; 141 in.readFully(bytes); 142 143 if (getFormat() == ArchiveFormat.ARC) { 144 this.record = ArcRecordUtils.fromBytes(bytes); 145 } else if (getFormat() == ArchiveFormat.WARC) { 146 this.record = WarcRecordUtils.fromBytes(bytes); 147 } else { 148 this.record = null; 149 } 150 } 151 152 @Override 153 public final void write(final DataOutput out) throws IOException { 154 if (record == null) { 155 out.writeInt(0); 156 } 157 byte[] bytes; 158 159 if (getFormat() == ArchiveFormat.ARC) { 160 bytes = ArcRecordUtils.toBytes((ARCRecord) record); 161 } else if (getFormat() == ArchiveFormat.WARC) { 162 bytes = WarcRecordUtils.toBytes((WARCRecord) record); 163 } else { 164 bytes = null; 165 } 166 167 out.writeInt(bytes.length); 168 out.write(bytes); 169 } 170}