Class

io.archivesunleashed.df

DataFrameLoader

Related Doc: package df

Permalink

class DataFrameLoader extends AnyRef

DataFrame wrapper for PySpark implementation. *

Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. DataFrameLoader
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new DataFrameLoader(sc: SparkContext)

    Permalink

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. def all(path: String): DataFrame

    Permalink

    Create a DataFrame with crawl_date, url, mime_type_web_server, content and bytes.

  5. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  6. def audio(path: String): DataFrame

    Permalink

    Create a DataFrame with audio url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  7. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  8. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  9. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  10. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  11. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  12. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  13. def imagegraph(path: String): DataFrame

    Permalink
  14. def images(path: String): DataFrame

    Permalink

    Create a DataFrame with image url, filename, extension, mime_type_web_server, mime_type_tika, width, height, md5, sha1, and raw bytes.

  15. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  16. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  17. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  18. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  19. def pdfs(path: String): DataFrame

    Permalink

    Create a DataFrame with PDF url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  20. def presentationProgramFiles(path: String): DataFrame

    Permalink

    Create a DataFrame with presentation program url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  21. def spreadsheets(path: String): DataFrame

    Permalink

    Create a DataFrame with spreadsheet url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  22. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  23. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  24. def videos(path: String): DataFrame

    Permalink

    Create a DataFrame with video url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  25. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  26. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  27. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  28. def webgraph(path: String): DataFrame

    Permalink

    Create a DataFrame with crawl_date, source, destination, and anchor.

  29. def webpages(path: String): DataFrame

    Permalink

    Create a DataFrame with crawl_date, url, mime_type_web_server, and content.

  30. def wordProcessorFiles(path: String): DataFrame

    Permalink

    Create a DataFrame with word processor url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

Inherited from AnyRef

Inherited from Any

Ungrouped