Packages

c

io.archivesunleashed.df

DataFrameLoader

class DataFrameLoader extends AnyRef

DataFrame wrapper for PySpark implementation. *

Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. DataFrameLoader
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new DataFrameLoader(sc: SparkContext)

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. def all(path: String): DataFrame

    Create a DataFrame with crawl_date, url, mime_type_web_server, mime_type_tika, content, bytes, http_status_code, and archive_filename.

  5. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  6. def audio(path: String): DataFrame

    Create a DataFrame with audio url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  7. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native() @HotSpotIntrinsicCandidate()
  8. def css(path: String): DataFrame
  9. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  10. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  11. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  12. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  13. def html(path: String): DataFrame
  14. def imagegraph(path: String): DataFrame
  15. def images(path: String): DataFrame

    Create a DataFrame with image url, filename, extension, mime_type_web_server, mime_type_tika, width, height, md5, sha1, and raw bytes.

  16. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  17. def js(path: String): DataFrame
  18. def json(path: String): DataFrame
  19. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  20. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  21. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  22. def pdfs(path: String): DataFrame

    Create a DataFrame with PDF url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  23. def plainText(path: String): DataFrame
  24. def presentationProgramFiles(path: String): DataFrame

    Create a DataFrame with presentation program file url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  25. def spreadsheets(path: String): DataFrame

    Create a DataFrame with spreadsheet url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  26. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  27. def toString(): String
    Definition Classes
    AnyRef → Any
  28. def videos(path: String): DataFrame

    Create a DataFrame with video url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  29. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  30. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  31. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  32. def webgraph(path: String): DataFrame

    Create a DataFrame with crawl_date, source, destination, and anchor.

  33. def webpages(path: String): DataFrame

    Create a DataFrame with crawl_date, url, mime_type_web_server, language, and content.

  34. def wordProcessorFiles(path: String): DataFrame

    Create a DataFrame with word processor file url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  35. def xml(path: String): DataFrame

Deprecated Value Members

  1. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] ) @Deprecated @deprecated
    Deprecated

    (Since version ) see corresponding Javadoc for more information.

Inherited from AnyRef

Inherited from Any

Ungrouped