001/*
002 * Copyright © 2017 The Archives Unleashed Project
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package io.archivesunleashed.data;
018
019import java.io.DataInput;
020import java.io.DataOutput;
021import java.io.IOException;
022import org.apache.hadoop.io.Writable;
023import org.archive.io.ArchiveRecord;
024import org.archive.io.arc.ARCRecord;
025import org.archive.io.warc.WARCRecord;
026
027/** Implements Hadoop Writable for Archive Records. */
028public class ArchiveRecordWritable implements Writable {
029
030  /** Archive Formats that can be used. {@link #UNKNOWN} {@link #ARC} {@link #WARC} */
031  public enum ArchiveFormat {
032    /** UNKNOWN format. */
033    UNKNOWN,
034
035    /** ARC format. */
036    ARC,
037
038    /** WARC format. */
039    WARC
040  }
041
042  /** Set default Record format to UNKNOWN. */
043  private ArchiveFormat format = ArchiveFormat.UNKNOWN;
044
045  /** Initialize Archive Record to null. */
046  private ArchiveRecord record = null;
047
048  /** Utility function. */
049  public ArchiveRecordWritable() {}
050
051  /**
052   * Initialize Archive Record.
053   *
054   * @param r Archive Record
055   */
056  public ArchiveRecordWritable(final ArchiveRecord r) {
057    this.record = r;
058    detectFormat();
059  }
060
061  /**
062   * Set Archive Record.
063   *
064   * @param r Archive Record
065   */
066  public final void setRecord(final ArchiveRecord r) {
067    this.record = r;
068    detectFormat();
069  }
070
071  /**
072   * Get Archive Record.
073   *
074   * @return record Archive Record
075   */
076  public final ArchiveRecord getRecord() {
077    return record;
078  }
079
080  /** Detect format of Archive Record. */
081  public final void detectFormat() {
082    if (record instanceof ARCRecord) {
083      format = ArchiveFormat.ARC;
084    } else if (record instanceof WARCRecord) {
085      format = ArchiveFormat.WARC;
086    } else {
087      format = ArchiveFormat.UNKNOWN;
088    }
089  }
090
091  /**
092   * Get format of Archive Record.
093   *
094   * @return format of Archive Record
095   */
096  public final ArchiveFormat getFormat() {
097    return format;
098  }
099
100  /**
101   * Set format of Archive Record.
102   *
103   * @param f format of Archive Record
104   */
105  public final void setFormat(final ArchiveFormat f) {
106    this.format = f;
107  }
108
109  @Override
110  public final void readFields(final DataInput in) throws IOException {
111    int len = in.readInt();
112    if (len == 0) {
113      this.record = null;
114      return;
115    }
116
117    byte[] bytes = new byte[len];
118    in.readFully(bytes);
119
120    if (getFormat() == ArchiveFormat.ARC) {
121      this.record = ArcRecordUtils.fromBytes(bytes);
122    } else if (getFormat() == ArchiveFormat.WARC) {
123      this.record = WarcRecordUtils.fromBytes(bytes);
124    } else {
125      this.record = null;
126    }
127  }
128
129  @Override
130  public final void write(final DataOutput out) throws IOException {
131    if (record == null) {
132      out.writeInt(0);
133    }
134    byte[] bytes;
135
136    if (getFormat() == ArchiveFormat.ARC) {
137      bytes = ArcRecordUtils.toBytes((ARCRecord) record);
138    } else if (getFormat() == ArchiveFormat.WARC) {
139      bytes = WarcRecordUtils.toBytes((WARCRecord) record);
140    } else {
141      bytes = null;
142    }
143
144    out.writeInt(bytes.length);
145    out.write(bytes);
146  }
147}