001/* 002 * Archives Unleashed Toolkit (AUT): 003 * An open-source platform for analyzing web archives. 004 * 005 * Licensed under the Apache License, Version 2.0 (the "License"); 006 * you may not use this file except in compliance with the License. 007 * You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 /** 019 * @deprecated as of 0.12.0 and will be replaced 020 * with ArchiveRecordWritable in a future release. 021 */ 022 023package io.archivesunleashed.io; 024 025import io.archivesunleashed.data.ArcRecordUtils; 026import io.archivesunleashed.data.WarcRecordUtils; 027import java.io.DataInput; 028import java.io.DataOutput; 029import java.io.IOException; 030import org.apache.hadoop.io.Writable; 031import org.archive.io.ArchiveRecord; 032import org.archive.io.arc.ARCRecord; 033import org.archive.io.warc.WARCRecord; 034 035/** 036 * Implements Hadoop Writable for Generic Archive Records. 037 */ 038public class GenericArchiveRecordWritable implements Writable { 039 040 /** 041 * Archive Formats that can be used. 042 * {@link #UNKNOWN} 043 * {@link #ARC} 044 * {@link #WARC} 045 */ 046 public enum ArchiveFormat { 047 /** 048 * UNKNOWN format. 049 */ 050 UNKNOWN, 051 052 /** 053 * ARC format. 054 */ 055 ARC, 056 057 /** 058 * WARC format. 059 */ 060 WARC 061 } 062 063 /** 064 * Set default Generic Record format to UNKNOWN. 065 */ 066 private ArchiveFormat format = ArchiveFormat.UNKNOWN; 067 068 /** 069 * Initialize Generic Archive Record to null. 070 */ 071 private ArchiveRecord record = null; 072 073 /** 074 * Utility function. 075 */ 076 public GenericArchiveRecordWritable() { 077 } 078 079 /** 080 * Initialize Generic Archive Record. 081 * 082 * @param r Generic Archive Record 083 */ 084 public GenericArchiveRecordWritable(final ArchiveRecord r) { 085 this.record = r; 086 detectFormat(); 087 } 088 089 /** 090 * Set Generic Archive Record. 091 * 092 * @param r Generic Archive Record 093 */ 094 public final void setRecord(final ArchiveRecord r) { 095 this.record = r; 096 detectFormat(); 097 } 098 099 /** 100 * Get Generic Archive Record. 101 * 102 * @return record Generic Archive Record 103 */ 104 public final ArchiveRecord getRecord() { 105 return record; 106 } 107 108 /** 109 * Detect format of Generic Archive Record. 110 */ 111 public final void detectFormat() { 112 if (record instanceof ARCRecord) { 113 format = ArchiveFormat.ARC; 114 } else if (record instanceof WARCRecord) { 115 format = ArchiveFormat.WARC; 116 } else { 117 format = ArchiveFormat.UNKNOWN; 118 } 119 } 120 121 /** 122 * Get format of Generic Archive Record. 123 * 124 * @return format of Generic Archive Record 125 */ 126 public final ArchiveFormat getFormat() { 127 return format; 128 } 129 130 /** 131 * Set format of Generic Archive Record. 132 * 133 * @param f format of Generic Archive Record 134 */ 135 public final void setFormat(final ArchiveFormat f) { 136 this.format = f; 137 } 138 139 @Override 140 public final void readFields(final DataInput in) throws IOException { 141 int len = in.readInt(); 142 if (len == 0) { 143 this.record = null; 144 return; 145 } 146 147 byte[] bytes = new byte[len]; 148 in.readFully(bytes); 149 150 if (getFormat() == ArchiveFormat.ARC) { 151 this.record = ArcRecordUtils.fromBytes(bytes); 152 } else if (getFormat() == ArchiveFormat.WARC) { 153 this.record = WarcRecordUtils.fromBytes(bytes); 154 } else { 155 this.record = null; 156 } 157 } 158 159 @Override 160 public final void write(final DataOutput out) throws IOException { 161 if (record == null) { 162 out.writeInt(0); 163 } 164 byte[] bytes; 165 166 if (getFormat() == ArchiveFormat.ARC) { 167 bytes = ArcRecordUtils.toBytes((ARCRecord) record); 168 } else if (getFormat() == ArchiveFormat.WARC) { 169 bytes = WarcRecordUtils.toBytes((WARCRecord) record); 170 } else { 171 bytes = null; 172 } 173 174 out.writeInt(bytes.length); 175 out.write(bytes); 176 } 177}