Packages

c

io.archivesunleashed.df

DataFrameLoader

class DataFrameLoader extends AnyRef

DataFrame wrapper for PySpark implementation. *

Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. DataFrameLoader
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new DataFrameLoader(sc: SparkContext)

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. def all(path: String): DataFrame

    Create a DataFrame with crawl_date, url, mime_type_web_server, mime_type_tika, content, bytes, http_status_code, and archive_filename.

  5. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  6. def audio(path: String): DataFrame

    Create a DataFrame with audio url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  7. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native() @HotSpotIntrinsicCandidate()
  8. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  9. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  10. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  11. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  12. def imagegraph(path: String): DataFrame
  13. def images(path: String): DataFrame

    Create a DataFrame with image url, filename, extension, mime_type_web_server, mime_type_tika, width, height, md5, sha1, and raw bytes.

  14. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  15. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  16. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  17. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  18. def pdfs(path: String): DataFrame

    Create a DataFrame with PDF url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  19. def presentationProgramFiles(path: String): DataFrame

    Create a DataFrame with presentation program file url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  20. def spreadsheets(path: String): DataFrame

    Create a DataFrame with spreadsheet url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  21. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  22. def toString(): String
    Definition Classes
    AnyRef → Any
  23. def videos(path: String): DataFrame

    Create a DataFrame with video url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

  24. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  25. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  26. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  27. def webgraph(path: String): DataFrame

    Create a DataFrame with crawl_date, source, destination, and anchor.

  28. def webpages(path: String): DataFrame

    Create a DataFrame with crawl_date, url, mime_type_web_server, language, and content.

  29. def wordProcessorFiles(path: String): DataFrame

    Create a DataFrame with word processor file url, filename, extension, mime_type_web_server, mime_type_tika, md5, sha1, and raw bytes.

Deprecated Value Members

  1. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] ) @Deprecated @deprecated
    Deprecated

    (Since version ) see corresponding Javadoc for more information.

Inherited from AnyRef

Inherited from Any

Ungrouped