Table of Contents

Below 2 implementations exemples to consume our export api endpoints.

dotnet exemple

namespace Steaming_csharp
{
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;

    class Program
    {
        const string urlbase = "";
        const string clientId = "";
        const string clientSecret = "";
        const string tokenUrl = urlbase + "api/token";

        static void ReadVacancyZip(byte[] zipContent)
        {
            using (ZipArchive zip = new ZipArchive(new MemoryStream(zipContent)))
            {
                ZipArchiveEntry entry = zip.GetEntry("offerdetail");
                JObject offerDetail = (JObject)JsonConvert.DeserializeObject(new StreamReader(entry.Open()).ReadToEnd());
                var offset = offerDetail["offset"];
                var offerReference = offerDetail["offerDetail"]["reference"];
                Console.WriteLine($"offset {offset} Offer reference : {offerReference}");
            }
        }

        static void ReadCandidateZip(byte[] zipContent)
        {
            using (ZipArchive zip = new ZipArchive(new MemoryStream(zipContent)))
            {
                ZipArchiveEntry entry = zip.GetEntry("applicantdetail");
                JObject candidateDetail = (JObject)JsonConvert.DeserializeObject(new StreamReader(entry.Open()).ReadToEnd());
                var offset = candidateDetail["offset"];
                var candidateId = candidateDetail["candidateDetail"]["id"];
                var applications = candidateDetail["applications"];
                var files = candidateDetail["attachments"];
                Console.WriteLine($"offset {offset} id {candidateId} application count {applications.Count()} files count {files.Count()}");
                foreach (var file in files)
                {
                    var fileId = file["id"];
                    var fileEntry = zip.GetEntry(fileId.ToString());
                    Console.WriteLine($"\t fileId {fileId} Size {fileEntry.Length}");
                }
            }
        }

        static async Task Main(string[] args)
        {

            HttpClient client = new HttpClient();
            HttpContent content = new StringContent($"grant_type=client_credentials&client_id={clientId}&client_secret={clientSecret}");
            content.Headers.ContentType = new MediaTypeHeaderValue("application/x-www-form-urlencoded");
            var response = await client.PostAsync(tokenUrl, content);
            var json = JsonConvert.DeserializeObject<JObject>(await response.Content.ReadAsStringAsync());
            var token = json["access_token"].ToString();

            await ExtractVacancies(client, token, urlbase + "api/exports/v1/vacancies?limit=50");
            await ExtractVacancies(client, token, urlbase + "api/exports/v1/vacancies?limit=50&offset=100");
            await ExtractCandidates(client, token, urlbase + "api/exports/v1/candidates?limit=50");
            await ExtractCandidates(client, token, urlbase + "api/exports/v1/candidates?limit=50&offset=100");

            Console.ReadLine();
        }

        private static async Task ExtractVacancies(HttpClient client, string token, string vacanciesStreamingUrl)
        {
            HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Get, vacanciesStreamingUrl);
            request.Headers.Authorization = new AuthenticationHeaderValue("bearer", token);
            var data = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
            var reader = new StreamingReader(await data.Content.ReadAsStreamAsync());
            foreach (var file in reader.GetFiles())
            {
                ReadVacancyZip(file);
            }
        }

        private static async Task ExtractCandidates(HttpClient client, string token, string candidatesStreamingUrl)
        {
            HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Get, candidatesStreamingUrl);
            request.Headers.Authorization = new AuthenticationHeaderValue("bearer", token);
            var data = await client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead);
            var reader = new StreamingReader(await data.Content.ReadAsStreamAsync());
            foreach (var file in reader.GetFiles())
            {
                ReadCandidateZip(file);
            }
        }
    }

    public class StreamingReader
    {
        private readonly Stream _stream;
        private byte[] chunkSizeOf4 = new byte[4];

        public StreamingReader(Stream stream)
        {
            this._stream = stream;
        }

        public IEnumerable<byte[]> GetFiles()
        {
            while (true)
            {
                byte[] chunk;
                try
                {
                    byte[] header = GetChunk(4);
                    int nextBlockSize = GetSizeFromHeader(header);
                    chunk = GetChunk(nextBlockSize);
                }
                catch (InvalidOperationException)
                {
                    break;
                }
                yield return chunk;
            }
        }

        private int GetSizeFromHeader(byte[] header)
        {
            return BitConverter.ToInt32(header, 0);
        }

        private byte[] GetChunk(int size)
        {
            byte[] chunk = size == chunkSizeOf4.Length ? chunkSizeOf4 : new byte[size];
            int totalCount = 0;
            while (totalCount < size)
            {
                var count = _stream.Read(chunk, totalCount, chunk.Length - totalCount);
                if (count == 0)
                {
                    throw new InvalidOperationException("Reach the end of the request content.");
                }
                totalCount += count;
            }
            return chunk;
        }
    }

}

python exemple

import sys
import requests 
from zipfile import ZipFile
import io
import json
from collections import namedtuple

urlbase = ''
clientId = ''
clientSecret = ''

tokenUrl = urlbase + 'api/token'
body = f'grant_type=client_credentials&client_id={clientId}&client_secret={clientSecret}'
response = requests.post(tokenUrl, data=body, headers={'Content-Type':'application/x-www-form-urlencoded'})

data = response.json()
token = data['access_token']

headers = {'Authorization':'bearer ' + token}


class StreamingReader():
    def __init__(self, response):
        self._response = response
        self._buffer = b''
        self._responseIter = None

    def getFiles(self):
        while not self._response._content_consumed:
            try:
                header = self._getChunk(4)
                nextBlockSize = self._getSizeFromHeader(header)
                yield self._getChunk(nextBlockSize)
            except StopIteration:
                return

    def _getChunk(self, size):
        if self._responseIter is None:
            self._responseIter = iter(self._response.iter_content(chunk_size=4096))
        while len(self._buffer) < size:
            self._buffer += next(self._responseIter)
        currentChunk = self._buffer[:size]
        self._buffer = self._buffer[size:]
        return currentChunk

    def _getSizeFromHeader(self, header):
        return int.from_bytes(header, byteorder='little')

def readVacancyZip(bytearray, totalCount):
    zip = ZipFile(io.BytesIO(bytearray))
    offerDetail = zip.read('offerdetail')
    data = json.loads(str(offerDetail, 'utf-8'), object_hook=lambda d: namedtuple('X', d.keys())(*d.values()))
    print(f'{data.offset}/{totalCount} -> Offer reference : {data.offerDetail.reference}')

def readCandidateZip(bytearray, totalCount):
    zip = ZipFile(io.BytesIO(bytearray))
    candidateDetail = zip.read('applicantdetail')
    data = json.loads(str(candidateDetail, 'utf-8'), object_hook=lambda d: namedtuple('X', d.keys())(*d.values()))
    candidateId = data.candidateDetail.id
    applications = data.applications
    files = data.attachments
    print(f'{data.offset}/{totalCount} candidate id {candidateId} application count {len(applications)} files count {len(files)}')
    for file in iter(files):
        fileid = file.id
        fileContent = zip.read(fileid)
        print(f'\t fileId {fileid} Size {len(fileContent)}')

def extractVacancies(vacanciesStreamingUrl):
    response = requests.get(vacanciesStreamingUrl, headers=headers, stream=True)
    contentRange = response.headers["Content-Range"]
    resultCount = int(contentRange[contentRange.index('/')+1:])-1
    reader = StreamingReader(response)
    for i in reader.getFiles():
        readVacancyZip(i, resultCount)

def extractCandidates(candidatesStreamingUrl):
    response = requests.get(candidatesStreamingUrl, headers=headers, stream=True)
    contentRange = response.headers["Content-Range"]
    resultCount = int(contentRange[contentRange.index('/')+1:])-1
    reader = StreamingReader(response)
    for i in reader.getFiles():
        readCandidateZip(i, resultCount)

extractVacancies(urlbase + 'api/exports/v1/vacancies?limit=50')
extractVacancies(urlbase + 'api/exports/v1/vacancies?offset=100&limit=50')

extractCandidates(urlbase + 'api/exports/v1/candidates?limit=50')
extractCandidates(urlbase + 'api/exports/v1/candidates?offset=100&limit=50')