001/*
002 * Archives Unleashed Toolkit (AUT):
003 * An open-source platform for analyzing web archives.
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package io.archivesunleashed.data;
019
020import org.apache.hadoop.io.Writable;
021import org.archive.io.ArchiveRecord;
022import org.archive.io.arc.ARCRecord;
023import org.archive.io.warc.WARCRecord;
024
025import java.io.DataInput;
026import java.io.DataOutput;
027import java.io.IOException;
028
029/**
030 * Implements Hadoop Writable for Archive Records.
031 */
032public class ArchiveRecordWritable implements Writable {
033
034  /**
035   * Archive Formats that can be used.
036   * {@link #UNKNOWN}
037   * {@link #ARC}
038   * {@link #WARC}
039   */
040  public enum ArchiveFormat {
041      /**
042       * UNKNOWN format.
043       */
044      UNKNOWN,
045
046      /**
047       * ARC format.
048       */
049      ARC,
050
051      /**
052       * WARC format.
053       */
054      WARC
055  }
056
057  /**
058   * Set default Record format to UNKNOWN.
059   */
060  private ArchiveFormat format = ArchiveFormat.UNKNOWN;
061
062  /**
063   * Initialize Archive Record to null.
064   */
065  private ArchiveRecord record = null;
066
067  /**
068   * Utility function.
069   */
070  public ArchiveRecordWritable() {
071  }
072
073  /**
074   * Initialize Archive Record.
075   *
076   * @param r Archive Record
077   */
078  public ArchiveRecordWritable(final ArchiveRecord r) {
079    this.record = r;
080    detectFormat();
081  }
082
083  /**
084   * Set Archive Record.
085   *
086   * @param r Archive Record
087   */
088  public final void setRecord(final ArchiveRecord r) {
089    this.record = r;
090    detectFormat();
091  }
092
093  /**
094   * Get Archive Record.
095   *
096   * @return record Archive Record
097   */
098  public final ArchiveRecord getRecord() {
099    return record;
100  }
101
102  /**
103   * Detect format of Archive Record.
104   */
105  public final void detectFormat() {
106    if (record instanceof ARCRecord) {
107      format = ArchiveFormat.ARC;
108    } else if (record instanceof WARCRecord)  {
109      format = ArchiveFormat.WARC;
110    } else {
111      format = ArchiveFormat.UNKNOWN;
112    }
113  }
114
115  /**
116   * Get format of Archive Record.
117   *
118   * @return format of Archive Record
119   */
120  public final ArchiveFormat getFormat() {
121    return format;
122  }
123
124  /**
125   * Set format of Archive Record.
126   *
127   * @param f format of Archive Record
128   */
129  public final void setFormat(final ArchiveFormat f) {
130    this.format = f;
131  }
132
133  @Override
134  public final void readFields(final DataInput in) throws IOException {
135    int len = in.readInt();
136    if (len == 0) {
137      this.record = null;
138      return;
139    }
140
141    byte[] bytes = new byte[len];
142    in.readFully(bytes);
143
144    if (getFormat() == ArchiveFormat.ARC) {
145      this.record = ArcRecordUtils.fromBytes(bytes);
146    } else if (getFormat() == ArchiveFormat.WARC) {
147      this.record = WarcRecordUtils.fromBytes(bytes);
148    } else {
149      this.record = null;
150    }
151  }
152
153  @Override
154  public final void write(final DataOutput out) throws IOException {
155    if (record == null) {
156      out.writeInt(0);
157    }
158    byte[] bytes;
159
160    if (getFormat() == ArchiveFormat.ARC) {
161      bytes = ArcRecordUtils.toBytes((ARCRecord) record);
162    } else if (getFormat() == ArchiveFormat.WARC) {
163      bytes = WarcRecordUtils.toBytes((WARCRecord) record);
164    } else {
165      bytes = null;
166    }
167
168    out.writeInt(bytes.length);
169    out.write(bytes);
170  }
171}