f7h
f7h

Reputation: 69

stream a zip created on the fly with play 2.5 and akka stream with backpressure

I would like to stream a zip created on the fly (without putting it entirely in memory) with the play-framework 2.5 using akka stream with backpressure. Here my code, with a small zip created on the fly(16KB).When the client download the url associated with the action, the download does not start.

import java.util.zip.{ ZipEntry, ZipOutputStream, GZIPOutputStream }
import akka.stream.scaladsl._
import akka.util.ByteString
import play.api.mvc._
import scala.concurrent.duration._
import java.io.{ BufferedOutputStream, ByteArrayOutputStream }
import scala.concurrent.{ Promise, Future }
import akka.stream.OverflowStrategy
class ZipController extends Controller {

  def getStreamedZip = Action {
    val source: Source[ByteString, java.io.OutputStream] = StreamConverters.asOutputStream()
    val result = source.mapMaterializedValue(x => {
      val zip = new ZipOutputStream(x)
      (0 to 100).map { i =>
        zip.putNextEntry(new ZipEntry("test-zip/README-" + i + ".txt"))
        zip.write("This is the line:\n".map(_.toByte).toArray)
        zip.closeEntry()
      }
      zip.close
      x
    })
    Ok.chunked(result).withHeaders(
      "Content-Type" -> "application/zip",
      "Content-Disposition" -> "attachment; filename=test.zip"
    )
  }


}

Basically I want to stream a zip file of 2GB on a 1 GB memory server. And this zip will be composed of files of about 15MB. Is it possible to write the zip without loading entirely each file in memory ? If let say 3 clients download the zip at 1MB/second speed. Approximatively how much memory these downloads will take ? Thank you in advance.

Upvotes: 1

Views: 1898

Answers (3)

akauppi
akauppi

Reputation: 18066

Since Alpakka 3.0.4, akka-stream-alpakka-file supports zip archives: https://doc.akka.io/docs/alpakka/current/file.html#zip-archive

Upvotes: 0

cozyss
cozyss

Reputation: 1388

Somehow the above methods didn't work for me. Here's my code that works for zipping files on the fly and download them via play framework.

import java.io.{BufferedOutputStream, ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.{ZipEntry, ZipOutputStream}
import akka.stream.scaladsl.{StreamConverters}
import org.apache.commons.io.FileUtils
import play.api.mvc.{Action, Controller}

class HomeController extends Controller {
  def single() = Action {
                         Ok.sendFile(
                           content = new java.io.File("C:\\Users\\a.csv"),
                           fileName = _ => "a.csv"
                         )
                       }

  def zip() = Action {
                     Ok.chunked(StreamConverters.fromInputStream(fileByteData)).withHeaders(
                       CONTENT_TYPE -> "application/zip",
                       CONTENT_DISPOSITION -> s"attachment; filename = test.zip"
                     )
                   }

  def fileByteData(): ByteArrayInputStream = {
    val fileList = List(
      new java.io.File("C:\\Users\\a.csv"),
      new java.io.File("C:\\Users\\b.csv")
    )

    val baos = new ByteArrayOutputStream()
    val zos = new ZipOutputStream(new BufferedOutputStream(baos))

    try {
      fileList.map(file => {
        zos.putNextEntry(new ZipEntry(file.toPath.getFileName.toString))
        zos.write(FileUtils.readFileToByteArray(file))
        zos.closeEntry()
      })
    } finally {
      zos.close()
    }

    new ByteArrayInputStream(baos.toByteArray)
  }
}

The basic idea for the zip() is converting files to ByteArrayInputStream and use StreamConverter to send it as chunked data.

Upvotes: 0

WeaponsGrade
WeaponsGrade

Reputation: 878

Here is an implementation sourced at https://gist.github.com/kirked/412b5156f94419e71ce4a84ec1d54761

/* License: MIT */

import com.typesafe.scalalogging.slf4j.StrictLogging
import java.io.{ByteArrayOutputStream, InputStream, IOException}
import java.util.zip.{ZipEntry, ZipOutputStream}
import play.api.libs.iteratee.{Enumeratee, Enumerator}
import scala.concurrent.{Future, ExecutionContext}

/**
 * Play iteratee-based reactive zip-file generation.
 */
object ZipEnumerator extends StrictLogging {

  /**
   * A source to zip.
   * 
   * @param  filepath The zip-file path at which to store the data.
   * @param  stream   The data stream provider.
   */
  case class Source(filepath: String, stream: () => Future[Option[InputStream]])

  /**
   * Given sources, returns an Enumerator that feeds a zip-file of the source contents.
   */
  def apply(sources: Iterable[Source])(implicit ec: ExecutionContext): Enumerator[Array[Byte]] = {
    val resolveSources: Enumerator[ResolvedSource] = Enumerator.unfoldM(sources) { sources =>
      sources.headOption match {
        case None                                 => Future(None)
        case Some(Source(filepath, futureStream)) =>
          futureStream().map { _.map(stream => (sources.tail, ResolvedSource(filepath, stream)) ) }
      }
    }

    val buffer = new ZipBuffer(8192)

    val writeCentralDirectory = Enumerator.generateM(Future {
      if (buffer.isClosed) None
      else {
        buffer.close
        Some(buffer.bytes)
      }
    })

    resolveSources &> zipeach(buffer) andThen writeCentralDirectory
  }


  private def zipeach(buffer: ZipBuffer)(implicit ec: ExecutionContext): Enumeratee[ResolvedSource, Array[Byte]] = {
    Enumeratee.mapConcat[ResolvedSource] { source =>
      buffer.zipStream.putNextEntry(new ZipEntry(source.filepath))
      var done = false

      def entryDone: Unit = {
        done = true
        buffer.zipStream.closeEntry
        source.stream.close
      }

      def restOfStream: Stream[Array[Byte]] = {
        if (done) Stream.empty
        else {
          while (!done && !buffer.full) {
            try {
              val byte = source.stream.read
              if (byte == -1) entryDone
              else buffer.zipStream.write(byte)
            }
            catch {
              case e: IOException =>
                logger.error(s"reading/zipping stream [${source.filepath}]", e)
                entryDone
            }
          }
          buffer.bytes #:: restOfStream
        }
      }

      restOfStream
    }
  }


  private case class ResolvedSource(filepath: String, stream: InputStream)


  private class ZipBuffer(capacity: Int) {
    private val buf = new ByteArrayOutputStream(capacity)
    private var closed = false
    val zipStream = new ZipOutputStream(buf)

    def close(): Unit = {
      if (!closed) {
        closed = true
        reset
        zipStream.close   // writes central directory
      }
    }

    def isClosed = closed

    def reset: Unit = buf.reset

    def full: Boolean = buf.size >= capacity

    def bytes: Array[Byte] = {
      val result = buf.toByteArray
      reset
      result
    }
  }
}

The usage looks something like this:

val s3 = ...
val sources = items.map(item => ZipEnumerator.Source(item.filename, { () => s3.getInputStream(item.storagePath) }))
Ok.chunked(ZipEnumerator(sources))(play.api.http.Writeable.wBytes).withHeaders(
            CONTENT_TYPE -> "application/zip",
            CONTENT_DISPOSITION -> s"attachment; filename=MyFiles.zip; filename*=UTF-8''My%20Files.zip"
          )

Upvotes: 1

Related Questions