001/*
002 * Archives Unleashed Toolkit (AUT):
003 * An open-source platform for analyzing web archives.
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 *     http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package io.archivesunleashed.io;
019
020import io.archivesunleashed.data.ArcRecordUtils;
021import io.archivesunleashed.data.WarcRecordUtils;
022import java.io.DataInput;
023import java.io.DataOutput;
024import java.io.IOException;
025import org.apache.hadoop.io.Writable;
026import org.archive.io.ArchiveRecord;
027import org.archive.io.arc.ARCRecord;
028import org.archive.io.warc.WARCRecord;
029
030/**
031 * Implements Hadoop Writable for Archive Records.
032 */
033public class ArchiveRecordWritable implements Writable {
034
035  /**
036   * Archive Formats that can be used.
037   * {@link #UNKNOWN}
038   * {@link #ARC}
039   * {@link #WARC}
040   */
041  public enum ArchiveFormat {
042      /**
043       * UNKNOWN format.
044       */
045      UNKNOWN,
046
047      /**
048       * ARC format.
049       */
050      ARC,
051
052      /**
053       * WARC format.
054       */
055      WARC
056  }
057
058  /**
059   * Set default Record format to UNKNOWN.
060   */
061  private ArchiveFormat format = ArchiveFormat.UNKNOWN;
062
063  /**
064   * Initialize Archive Record to null.
065   */
066  private ArchiveRecord record = null;
067
068  /**
069   * Utility function.
070   */
071  public ArchiveRecordWritable() {
072  }
073
074  /**
075   * Initialize Archive Record.
076   *
077   * @param r Archive Record
078   */
079  public ArchiveRecordWritable(final ArchiveRecord r) {
080    this.record = r;
081    detectFormat();
082  }
083
084  /**
085   * Set Archive Record.
086   *
087   * @param r Archive Record
088   */
089  public final void setRecord(final ArchiveRecord r) {
090    this.record = r;
091    detectFormat();
092  }
093
094  /**
095   * Get Archive Record.
096   *
097   * @return record Archive Record
098   */
099  public final ArchiveRecord getRecord() {
100    return record;
101  }
102
103  /**
104   * Detect format of Archive Record.
105   */
106  public final void detectFormat() {
107    if (record instanceof ARCRecord) {
108      format = ArchiveFormat.ARC;
109    } else if (record instanceof WARCRecord)  {
110      format = ArchiveFormat.WARC;
111    } else {
112      format = ArchiveFormat.UNKNOWN;
113    }
114  }
115
116  /**
117   * Get format of Archive Record.
118   *
119   * @return format of Archive Record
120   */
121  public final ArchiveFormat getFormat() {
122    return format;
123  }
124
125  /**
126   * Set format of Archive Record.
127   *
128   * @param f format of Archive Record
129   */
130  public final void setFormat(final ArchiveFormat f) {
131    this.format = f;
132  }
133
134  @Override
135  public final void readFields(final DataInput in) throws IOException {
136    int len = in.readInt();
137    if (len == 0) {
138      this.record = null;
139      return;
140    }
141
142    byte[] bytes = new byte[len];
143    in.readFully(bytes);
144
145    if (getFormat() == ArchiveFormat.ARC) {
146      this.record = ArcRecordUtils.fromBytes(bytes);
147    } else if (getFormat() == ArchiveFormat.WARC) {
148      this.record = WarcRecordUtils.fromBytes(bytes);
149    } else {
150      this.record = null;
151    }
152  }
153
154  @Override
155  public final void write(final DataOutput out) throws IOException {
156    if (record == null) {
157      out.writeInt(0);
158    }
159    byte[] bytes;
160
161    if (getFormat() == ArchiveFormat.ARC) {
162      bytes = ArcRecordUtils.toBytes((ARCRecord) record);
163    } else if (getFormat() == ArchiveFormat.WARC) {
164      bytes = WarcRecordUtils.toBytes((WARCRecord) record);
165    } else {
166      bytes = null;
167    }
168
169    out.writeInt(bytes.length);
170    out.write(bytes);
171  }
172}