001/*
002 * Copyright © 2017 The Archives Unleashed Project
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package io.archivesunleashed.data;
018
019import org.apache.hadoop.io.Writable;
020import org.archive.io.ArchiveRecord;
021import org.archive.io.arc.ARCRecord;
022import org.archive.io.warc.WARCRecord;
023
024import java.io.DataInput;
025import java.io.DataOutput;
026import java.io.IOException;
027
028/**
029 * Implements Hadoop Writable for Archive Records.
030 */
031public class ArchiveRecordWritable implements Writable {
032
033  /**
034   * Archive Formats that can be used.
035   * {@link #UNKNOWN}
036   * {@link #ARC}
037   * {@link #WARC}
038   */
039  public enum ArchiveFormat {
040      /**
041       * UNKNOWN format.
042       */
043      UNKNOWN,
044
045      /**
046       * ARC format.
047       */
048      ARC,
049
050      /**
051       * WARC format.
052       */
053      WARC
054  }
055
056  /**
057   * Set default Record format to UNKNOWN.
058   */
059  private ArchiveFormat format = ArchiveFormat.UNKNOWN;
060
061  /**
062   * Initialize Archive Record to null.
063   */
064  private ArchiveRecord record = null;
065
066  /**
067   * Utility function.
068   */
069  public ArchiveRecordWritable() {
070  }
071
072  /**
073   * Initialize Archive Record.
074   *
075   * @param r Archive Record
076   */
077  public ArchiveRecordWritable(final ArchiveRecord r) {
078    this.record = r;
079    detectFormat();
080  }
081
082  /**
083   * Set Archive Record.
084   *
085   * @param r Archive Record
086   */
087  public final void setRecord(final ArchiveRecord r) {
088    this.record = r;
089    detectFormat();
090  }
091
092  /**
093   * Get Archive Record.
094   *
095   * @return record Archive Record
096   */
097  public final ArchiveRecord getRecord() {
098    return record;
099  }
100
101  /**
102   * Detect format of Archive Record.
103   */
104  public final void detectFormat() {
105    if (record instanceof ARCRecord) {
106      format = ArchiveFormat.ARC;
107    } else if (record instanceof WARCRecord)  {
108      format = ArchiveFormat.WARC;
109    } else {
110      format = ArchiveFormat.UNKNOWN;
111    }
112  }
113
114  /**
115   * Get format of Archive Record.
116   *
117   * @return format of Archive Record
118   */
119  public final ArchiveFormat getFormat() {
120    return format;
121  }
122
123  /**
124   * Set format of Archive Record.
125   *
126   * @param f format of Archive Record
127   */
128  public final void setFormat(final ArchiveFormat f) {
129    this.format = f;
130  }
131
132  @Override
133  public final void readFields(final DataInput in) throws IOException {
134    int len = in.readInt();
135    if (len == 0) {
136      this.record = null;
137      return;
138    }
139
140    byte[] bytes = new byte[len];
141    in.readFully(bytes);
142
143    if (getFormat() == ArchiveFormat.ARC) {
144      this.record = ArcRecordUtils.fromBytes(bytes);
145    } else if (getFormat() == ArchiveFormat.WARC) {
146      this.record = WarcRecordUtils.fromBytes(bytes);
147    } else {
148      this.record = null;
149    }
150  }
151
152  @Override
153  public final void write(final DataOutput out) throws IOException {
154    if (record == null) {
155      out.writeInt(0);
156    }
157    byte[] bytes;
158
159    if (getFormat() == ArchiveFormat.ARC) {
160      bytes = ArcRecordUtils.toBytes((ARCRecord) record);
161    } else if (getFormat() == ArchiveFormat.WARC) {
162      bytes = WarcRecordUtils.toBytes((WARCRecord) record);
163    } else {
164      bytes = null;
165    }
166
167    out.writeInt(bytes.length);
168    out.write(bytes);
169  }
170}